Split subtitles handling into two handlers.
One for generation and one for import.
This commit is contained in:
parent
2da06f84a7
commit
10c126a37c
@ -57,7 +57,7 @@ token = A70keN
|
||||
# size and one other job of the next smaller size. Otherwise no jobs will run.
|
||||
#
|
||||
# See also TranscodeHandler.jobsize and SubtitlesHandler.jobsize
|
||||
capacity = 100
|
||||
capacity = 20
|
||||
|
||||
|
||||
[Daisy]
|
||||
@ -72,7 +72,7 @@ url = ldaps://ldap.example.com
|
||||
base_dn = dc=example,dc=com
|
||||
|
||||
|
||||
[SubtitlesHandler]
|
||||
[SubtitlesWhisperHandler]
|
||||
# The whisper model to use for subtitle generation
|
||||
whispermodel = large-v2
|
||||
|
||||
@ -83,7 +83,7 @@ modeldir = /some/path
|
||||
# The amount of resources a single whisper job will consume
|
||||
# in the worker pool. This should be set so that the server can handle the
|
||||
# pool getting completely filled with jobs of this type.
|
||||
jobsize = 25
|
||||
jobsize = 5
|
||||
|
||||
|
||||
[ThumbnailHandler]
|
||||
@ -105,7 +105,7 @@ encoder = software
|
||||
# The amount of resources a single transcode job will consume
|
||||
# in the worker pool. This should be set so that the server can handle the
|
||||
# pool getting completely filled with jobs of this type.
|
||||
jobsize = 5
|
||||
jobsize = 2
|
||||
|
||||
|
||||
[MediasiteProcessor]
|
||||
|
@ -5,7 +5,8 @@ from .handler import Handler
|
||||
from .metadata import MetadataHandler
|
||||
from .poster import PosterHandler
|
||||
from .slides import SlidesHandler
|
||||
from .subtitles import SubtitlesHandler
|
||||
from .subtitles_whisper import SubtitlesWhisperHandler
|
||||
from .subtitles_import import SubtitlesImportHandler
|
||||
from .thumbnail import ThumbnailHandler
|
||||
from .transcode import TranscodeHandler
|
||||
from ..ldap import Ldap
|
||||
@ -15,7 +16,8 @@ allHandlers = [AudioHandler,
|
||||
MetadataHandler,
|
||||
PosterHandler,
|
||||
SlidesHandler,
|
||||
SubtitlesHandler,
|
||||
SubtitlesImportHandler,
|
||||
SubtitlesWhisperHandler,
|
||||
ThumbnailHandler,
|
||||
TranscodeHandler,
|
||||
]
|
||||
|
70
pipeline/handlers/subtitles_import.py
Normal file
70
pipeline/handlers/subtitles_import.py
Normal file
@ -0,0 +1,70 @@
|
||||
import logging
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .handler import Handler
|
||||
from ..exceptions import ValidationException
|
||||
|
||||
|
||||
@Handler.register
|
||||
class SubtitlesImportHandler(Handler):
|
||||
"""
|
||||
This class saves uploaded subtitles to a package.
|
||||
"""
|
||||
@classmethod
|
||||
def wants(cls, jobspec, existing_package):
|
||||
"""
|
||||
Return True if this handler wants to process this jobspec.
|
||||
Raises an exception if the job is wanted but doesn't pass validation.
|
||||
|
||||
A job is wanted if the job specification contains a 'subtitles' key.
|
||||
"""
|
||||
if 'subtitles' in jobspec:
|
||||
return cls._validate(jobspec, existing_package)
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _validate(cls, jobspec, existing_package):
|
||||
"""
|
||||
Return True if the job is valid for this handler.
|
||||
|
||||
Validity requirements are:
|
||||
- Keys in 'subtitles' and 'generate_subtitles' must be
|
||||
mututally unique.
|
||||
- If any value in the 'subtitles' object is not None, the job must
|
||||
contain an 'upload_dir' key which must point to an
|
||||
existing directory.
|
||||
- All 'subtitles' values that are not None must be existing files
|
||||
under 'upload_dir'.
|
||||
"""
|
||||
super()._validate(jobspec, existing_package)
|
||||
# Check for duplicate track names
|
||||
generate_names = jobspec.get('generate_subtitles', {}).keys()
|
||||
store_names = jobspec.get('subtitles', {}).keys()
|
||||
common_names = generate_names & store_names
|
||||
if common_names:
|
||||
names_string = ', '.join(common_names)
|
||||
raise ValidationException(
|
||||
f"Duplicate subtitle track name(s): {names_string}")
|
||||
|
||||
# Validate storage tasks
|
||||
for name, subsfile in jobspec.get('subtitles', {}).items():
|
||||
if not subsfile:
|
||||
continue
|
||||
if 'upload_dir' not in jobspec:
|
||||
raise ValidationException("upload_dir missing")
|
||||
subspath = Path(jobspec['upload_dir']) / subsfile
|
||||
if not subspath.is_file():
|
||||
raise ValidationException(
|
||||
f"Error for subtitle track {name}: "
|
||||
f"{subspath} is not a valid file")
|
||||
return True
|
||||
|
||||
def _handle(self, jobspec, existing_package, tempdir):
|
||||
def apply_func(package):
|
||||
for name, subsfile in jobspec.get('subtitles', {}).items():
|
||||
subspath = None
|
||||
if subsfile:
|
||||
subspath = Path(jobspec['upload_dir']) / subsfile
|
||||
package.set_subtitle_track(name, subspath)
|
||||
return apply_func
|
@ -16,7 +16,8 @@ def _do_whisper_transcribe(inpath,
|
||||
device,
|
||||
modelname,
|
||||
modeldir,
|
||||
language=None):
|
||||
loglevel,
|
||||
language=None,):
|
||||
"""
|
||||
Transcribe the given file at 'inpath' to a VTT file at 'outpath'
|
||||
using the Whisper engine.
|
||||
@ -25,7 +26,8 @@ def _do_whisper_transcribe(inpath,
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(
|
||||
'play-daemon.SubtitlesHandler._do_whisper_transcribe')
|
||||
'play-daemon.SubtitlesWhisperHandler._do_transcribe')
|
||||
logger.setLevel(loglevel)
|
||||
logger.info(f'Starting whisper transcription job for {inpath}.')
|
||||
try:
|
||||
whisperModel = whisper.load_model(
|
||||
@ -57,14 +59,13 @@ def _do_whisper_transcribe(inpath,
|
||||
elapsed = time.strftime('%H:%M:%S', time.gmtime(end - start))
|
||||
logger.info('Finished whisper transcription job '
|
||||
f'for {inpath} in {elapsed}.')
|
||||
[handler.flush() for handler in logger.handlers]
|
||||
return (outpath, out_language)
|
||||
|
||||
|
||||
@Handler.register
|
||||
class SubtitlesHandler(Handler):
|
||||
class SubtitlesWhisperHandler(Handler):
|
||||
"""
|
||||
This class handles package subtitles.
|
||||
This class handles subtitle generation with Whisper.
|
||||
"""
|
||||
def __init__(self,
|
||||
handlerqueue,
|
||||
@ -84,7 +85,7 @@ class SubtitlesHandler(Handler):
|
||||
self.whispermodel = config['whispermodel']
|
||||
self.modeldir = config['modeldir']
|
||||
self.device = device
|
||||
self.logger.debug(f'Created SubtitlesHandler on {device}')
|
||||
self.logger.debug(f'Created SubtitlesWhisperHandler on {device}')
|
||||
|
||||
@classmethod
|
||||
def instantiate(cls,
|
||||
@ -95,7 +96,7 @@ class SubtitlesHandler(Handler):
|
||||
tempdir,
|
||||
config):
|
||||
"""
|
||||
Returns a list SubtitlesHandlers.
|
||||
Returns a list of SubtitlesWhisperHandlers.
|
||||
|
||||
Instantiation behaviour is governed by two configuration values:
|
||||
device and count. Both are optional.
|
||||
@ -153,10 +154,10 @@ class SubtitlesHandler(Handler):
|
||||
Return True if this handler wants to process this jobspec.
|
||||
Raises an exception if the job is wanted but doesn't pass validation.
|
||||
|
||||
A job is wanted if the job specification contains a 'subtitles' or a
|
||||
'generate_subtitles' key.
|
||||
A job is wanted if the job specification contains
|
||||
a 'generate_subtitles' key.
|
||||
"""
|
||||
if 'subtitles' in jobspec or 'generate_subtitles' in jobspec:
|
||||
if 'generate_subtitles' in jobspec:
|
||||
return cls._validate(jobspec, existing_package)
|
||||
return False
|
||||
|
||||
@ -168,11 +169,6 @@ class SubtitlesHandler(Handler):
|
||||
Validity requirements are:
|
||||
- Keys in 'subtitles' and 'generate_subtitles' must be
|
||||
mututally unique.
|
||||
- If any value in the 'subtitles' object is not None, the job must
|
||||
contain an 'upload_dir' key which must point to an
|
||||
existing directory.
|
||||
- All 'subtitles' values that are not None must be existing files
|
||||
under 'upload_dir'.
|
||||
- All 'source' values in subtitle generation specifications must be a
|
||||
valid source name, either one that already exists or one provided
|
||||
under 'sources' in this job.
|
||||
@ -203,27 +199,15 @@ class SubtitlesHandler(Handler):
|
||||
raise ValidationException(f"Subtitle track '{name}' refers "
|
||||
"to a missing source: "
|
||||
f"{expected_source}")
|
||||
|
||||
# Validate storage tasks
|
||||
for name, subsfile in jobspec.get('subtitles', {}).items():
|
||||
if not subsfile:
|
||||
continue
|
||||
if 'upload_dir' not in jobspec:
|
||||
raise ValidationException("upload_dir missing")
|
||||
subspath = Path(jobspec['upload_dir']) / subsfile
|
||||
if not subspath.is_file():
|
||||
raise ValidationException(
|
||||
f"Error for subtitle track {name}: "
|
||||
f"{subspath} is not a valid file")
|
||||
return True
|
||||
|
||||
def _handle(self, jobspec, existing_package, tempdir):
|
||||
"""
|
||||
Return a function to apply changes to the stored package.
|
||||
|
||||
Any subtitle generation tasks are run before apply_func is returned.
|
||||
The returned function moves subtitle files into the package's basedir
|
||||
and updates the package metadata.
|
||||
All subtitle generation tasks are run before apply_func is returned.
|
||||
The returned function moves generated subtitle files into the
|
||||
package's basedir and updates the package metadata.
|
||||
Replaced subtitle tracks are deleted.
|
||||
"""
|
||||
|
||||
@ -233,6 +217,7 @@ class SubtitlesHandler(Handler):
|
||||
|
||||
for trackname, item in jobspec.get('generate_subtitles', {}).items():
|
||||
sourcename = item['source']
|
||||
generated_name = sourcename.replace('_', '__').replace(' ', '_')
|
||||
language = item.get('language', None)
|
||||
sourcepath = None
|
||||
source_from_job = jobspec.get('sources', {}).get(sourcename, {})
|
||||
@ -250,7 +235,8 @@ class SubtitlesHandler(Handler):
|
||||
sourcefile = existing_source['video'][resolutions_sorted[0]]
|
||||
sourcepath = basedir / sourcefile
|
||||
|
||||
outpath = tempdir / f"{sourcename}.vtt"
|
||||
generated_name = sourcename.replace('_', '__').replace(' ', '_')
|
||||
outpath = tempdir / f"{generated_name}.vtt"
|
||||
|
||||
transcribe = self.asyncjob(self.jobsize,
|
||||
_do_whisper_transcribe,
|
||||
@ -258,7 +244,8 @@ class SubtitlesHandler(Handler):
|
||||
outpath,
|
||||
self.device,
|
||||
self.whispermodel,
|
||||
self.modeldir),
|
||||
self.modeldir,
|
||||
loglevel),
|
||||
{'language': language})
|
||||
transcribes.append(transcribe)
|
||||
resultfiles[trackname] = outpath
|
||||
@ -270,12 +257,6 @@ class SubtitlesHandler(Handler):
|
||||
self.logger.info("Done, making apply_func")
|
||||
|
||||
def apply_func(package):
|
||||
for name, subsfile in jobspec.get('subtitles', {}).items():
|
||||
subspath = None
|
||||
if subsfile:
|
||||
subspath = Path(jobspec['upload_dir']) / subsfile
|
||||
package.set_subtitle_track(name, subspath)
|
||||
|
||||
for name, subspath in resultfiles.items():
|
||||
package.set_subtitle_track(name, subspath)
|
||||
|
Loading…
x
Reference in New Issue
Block a user