whisper-daemon/pipeline/whisperprocess.py

90 lines
2.9 KiB
Python

import logging
import os
import time
from pathlib import Path
from queue import Empty, Queue
import whisper
import whisper.utils
def queue_pickup(highqueue: Queue, lowqueue: Queue):
try:
return highqueue.get_nowait()
except Empty:
pass
return lowqueue.get_nowait()
def whisper_process(highqueue: Queue,
lowqueue: Queue,
outqueue: Queue,
output_base_dir: Path,
modelname: str,
modeldir: Path,
loglevel: int,
device: str):
logger = logging.getLogger(
f'whisper-daemon.whisper_process[{os.getpid()}]')
logger.setLevel(loglevel)
logger.info('Starting on %s', device)
rundevice = 'cpu'
if device != 'cpu':
os.environ['CUDA_VISIBLE_DEVICES'] = device
rundevice = 'cuda'
whisperModel = whisper.load_model(modelname,
download_root=modeldir,
device=rundevice)
logger.debug('Model loaded, now processing jobs')
while True:
try:
job = queue_pickup(highqueue, lowqueue)
except Empty:
time.sleep(1)
continue
jobid = job.jobid
logger.debug('%s - Picked up job', jobid)
language = job.language
inpath = job.jobfile
outputdir = output_base_dir / jobid
outputdir.mkdir()
writer = whisper.utils.get_writer(job.outputformat, outputdir)
if job.outputformat == 'vtt':
writer.always_include_hours = True
outpath = outputdir / inpath.with_suffix('.' + writer.extension).name
logger.debug('%s - inpath=%s, outpath=%s', jobid, inpath, outpath)
logger.info('%s - Starting whisper transcription', jobid)
start = time.time()
try:
result = whisper.transcribe(whisperModel,
str(inpath),
language=language,
word_timestamps=True)
end = time.time()
if language is None:
out_language = result['language']
logger.info("%s - Detected language '%s'",
jobid, out_language)
else:
out_language = language
with open(outpath, 'w') as f:
writer.write_result(result, f, {'max_line_width': None,
'max_line_count': 3,
'max_words_per_line': 15,
'highlight_words': False})
elapsed = time.strftime('%H:%M:%S', time.gmtime(end - start))
logger.info('%s - Transcription finished in %s', jobid, elapsed)
job.finish(outpath)
except Exception as e:
job.fail(e)
outqueue.put(job)