from datetime import datetime, timedelta import json import os import re import shutil import logging from requests.auth import HTTPBasicAuth from requests.sessions import Session from requests.exceptions import HTTPError from daisy import DaisyHandler class PackageException(Exception): def __init__(self, package, problem): self.package = package self.problem = problem class Mediasite: def __init__(self, config): self.incoming = config['daemon']['incoming'] self.auth = HTTPBasicAuth(config['mediasite']['user'], config['mediasite']['password']) self.chunk_size = 10485760 # 10MiB; seems optimal for speed # Tested 8k, 10MiB and 20MiB self.logger = logging.getLogger('play-daemon') def pack(self, pres_id, queue_item): data = queue_item['data'] base = os.path.join(self.incoming, pres_id) if os.path.exists(base): shutil.rmtree(base) os.mkdir(base) presenters = [] for item in data['presenters']: presenters.append(item.split('@')[0]) mypackage = {'id': pres_id, 'base': base, 'origin': queue_item['type'], 'creation': data['created'], 'title': data['title'], 'description': data.get('description', ''), 'thumb': '', 'presenters': presenters, 'courses': data['courses'], 'visibility': data.get('visibility', ''), 'duration': data['duration']/1000, 'tags': data['tags'], 'sources': []} if 'id' in data: mypackage['notification_id'] = data['id'] # Create one session for all the downloads with Session() as session: session.auth = self.auth session.stream = True try: mypackage['thumb'] = self._download(base, data['thumb'], session) except HTTPError: # Missing thumb can be generated pass # Download video sources, and store the information with a local # file path in mypackage for the transcoder stream_index = 0 for source in data['sources']: mysource = {'video': None, 'name': source.get('name', stream_index), 'poster': '', 'playAudio': source['playAudio']} try: mysource['video'] = self._download(base, source['video'], session) except HTTPError: # Mediasite has lost the stream, skip it. m = '%s - Skipped missing mediasite stream: %s' self.logger.info(m, pres_id, source['video']) continue try: mysource['poster'] = self._download(base, source['poster'], session) except HTTPError: # Missing poster can be created later pass mypackage['sources'].append(mysource) stream_index += 1 if 'slides' in data: # Slides exist, create a package for creating # a video from the slides try: mypackage['sources'].append( self._download_slides(base, data, session)) except HTTPError: # Another lost stream, nothing to do but ignore and continue pass return mypackage # function to download the material from mediasite def _download(self, base, remotefile, session): localname = remotefile.split('/')[-1] localpath = os.path.join(base, localname) r = session.get(remotefile) r.raise_for_status() with open(localpath, 'xb') as f: for chunk in r.iter_content(chunk_size=self.chunk_size): f.write(chunk) return localname def _download_slides(self, base, data, session): # https://trac.ffmpeg.org/wiki/Slideshow slides_path = os.path.join(base, 'slides') os.mkdir(slides_path) slides = [] demux_file = os.path.join(slides_path, 'demux.txt') with open(demux_file, 'w') as f: f.write('ffconcat version 1.0\n') num_slides = len(data['slides']) # loop all slides and download, calculate the duration and # create a text file holding all the info for the # ffmpeg demuxer for i in range(num_slides): slide = data['slides'][i] # Download the source file # and store the url as a local file path slide_name = self._download(slides_path, slide['url'], session) slide_path = os.path.join(slides_path, slide_name) # Handle the different edgecases # for indiviual slide duration if i == num_slides - 1: # last slide duration = data['duration'] - int(slide['duration']) else: next_slide = data['slides'][i+1] if i == 0: # first slide duration = next_slide['duration'] else: duration = (int(next_slide['duration']) - int(slide['duration'])) # Commit to the demuxfile. f.write(f"file '{slide_path}'\n") # The format assumes seconds, so we specify ms. # https://ffmpeg.org/ffmpeg-utils.html#Time-duration f.write(f'duration {duration}ms\n') slides.append({'url': slide_path, 'duration': f'{duration}ms'}) # to accomodate for an ffmpeg quirk that needs # the last slide twice f.write(f"file '{slides[-1]['url']}'\n") # put all the slides info in mypackage for # the transcoder to rework into a mp4 video return {'demux_file': demux_file, 'name': 'slide', 'poster': slides[0]['url'], 'playAudio': False } class Manual: def pack(self, pres_id, queue_item): data = queue_item['data'] mypackage = {'id': pres_id, 'base': data['base'], 'origin': queue_item['type'], 'notification_id': data['id'], 'creation': data['created'], 'title': data['title'], 'description': data.get('description', ''), 'presenters': data['presenters'], 'courses': data['courses'], 'duration': data['duration'], 'thumb': data.get('thumb', ''), 'tags': data['tags'], 'subtitles': data.get('subtitles', ''), 'sources': []} stream_index = 0 for source in data['sources']: mysource = {'video': source['video'], 'name': stream_index, 'poster': source.get('poster', ''), 'playAudio': source['playAudio']} mypackage['sources'].append(mysource) stream_index += 1 return mypackage class Cattura: def __init__(self, config): self.recorders = config['recorders'] self.daisy = DaisyHandler(config) self.path_regex = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$') def pack(self, pres_id, queue_item): data = queue_item['data'] recorder = queue_item['recorder'] info = self._find_packageinfo(pres_id, data['publishedOutputs']) name = info['name'] rawpath = info['path'] host, path, pkgfile = self.path_regex.match(rawpath).group(1, 2, 3) mediapackage = None with open(os.path.join(path, pkgfile)) as f: mediapackage = json.load(f) mypackage = {'id': pres_id, 'base': path, 'origin': 'cattura', 'creation': 0, 'title': 'Unknown title', 'description': '', 'presenters': [], 'courses': [], 'duration': 0, 'thumb': '', 'tags': [], 'sources': []} outputs = mediapackage['outputs'] for key in outputs.keys(): if key.startswith('media/'): media = outputs[key] poster = '' # The package format seems to not be consistent enough for this #richfile = self._find_enrichment( # media['element']['video']['sourceID'], mediapackage) #if richfile: # with open(os.path.join(path, richfile)) as f: # enrichment = json.load(f) # poster = self._find_poster(enrichment) source = {'name': media['element']['name'], 'video': media['file'], 'poster': poster, 'playAudio': False} if source['name'] == 'main': end = int(media['element']['creationDate'] / 1000) dur = media['element']['duration']['timestamp'] / 1000 mypackage['duration'] = dur mypackage['creation'] = int(end - dur) # The primary stream doesn't get tagged with its configured # name, but instead gets the presentation name. # The camera is the primary, so we play the sound from there # for sync purposes elif source['name'] == name: source['playAudio'] = True mypackage['sources'].append(source) mypackage['tags'].append( self.daisy.get_room_name(self.recorders[recorder])) start = datetime.fromtimestamp(mypackage['creation']) end = start + timedelta(seconds=mypackage['duration']) booking = self.daisy.get_booking(start, end, self.recorders[recorder]) if booking is not None: title = {'sv': booking['displayStringLong']['swedish'], 'en': booking['displayStringLong']['english']} mypackage['title'] = title mypackage['presenters'] = booking['teachers'] if booking['description']: mypackage['description'] = booking['description'] if not mypackage['presenters'] and booking['bookedBy']: mypackage['presenters'].append(booking['bookedBy']) mypackage['courses'] = booking['courseSegmentInstances'] # This is just an int and there is no way to get a string: #mypackage['tags'].append(booking['educationalType']) return mypackage def _find_packageinfo(self, pres_id, data): for key in data.keys(): if key.startswith('mediapackage:'): return data[key] raise PackageException(json.dumps(data), 'cannot find mediapackage element') def _find_enrichment(self, sourceid, mediapackage): _, splitid = sourceid.split(',') tocid = 'toc:' + splitid try: return mediapackage['outputs'][tocid]['file'] except KeyError: return '' def _find_poster(self, enrichment): for frame in enrichment['entries']: if frame['timestamp']['timestamp'] == 0: return frame['screenshot']['path'] return '' class Update: def pack(self, temp_id, queue_item): data = queue_item['data'] mypackage = {'id': temp_id, 'orig_id': data['id'], 'base': data['base'], 'origin': queue_item['type'], 'creation': data['created'], 'title': data['title'], 'description': data.get('description', ''), 'presenters': data['presenters'], 'courses': data['courses'], 'duration': data['duration'], 'thumb': data.get('thumb', ''), 'tags': data['tags'], 'subtitles': data.get('subtitles', ''), 'sources': []} for source in data['sources']: mysource = {'video': source['video'], 'name': source['name'], 'poster': source.get('poster', ''), 'playAudio': source['playAudio']} mypackage['sources'].append(mysource) return mypackage