Module elpis.transcriber
Expand source code
from elpis.transcriber.results import build_elan, build_text
from elpis.transcriber.transcribe import build_pipeline, transcribe
__all__ = ["build_elan", "build_text", "build_pipeline", "transcribe"]
Sub-modules
elpis.transcriber.results
Functions
def build_elan(annotations: List[Annotation], tier_name: str = 'Phrase') ‑> pympi.Elan.Eaf
-
Builds an elan file from the given annotations and tier_name.
Parameters
annotations: The list of annotations to add. tier_name: The name of the tier to add the annotations under.
Returns
The resulting elan file.
Expand source code
def build_elan(annotations: List[Annotation], tier_name: str = "Phrase") -> Eaf: """Builds an elan file from the given annotations and tier_name. Parameters: annotations: The list of annotations to add. tier_name: The name of the tier to add the annotations under. Returns: The resulting elan file. """ result = Eaf() result.add_tier(tier_name) for annotation in annotations: result.add_annotation( id_tier=tier_name, start=annotation.start_ms, end=annotation.stop_ms, value=annotation.transcript, ) return result
def build_pipeline(pretrained_location: str, processor_location: Optional[str] = None, cache_dir: Optional[pathlib.Path] = None) ‑> transformers.pipelines.automatic_speech_recognition.AutomaticSpeechRecognitionPipeline
-
Builds the pipeline from the supplied pretrained location.
Parameters
pretrained_location: A huggingface model name, or local path to the pretrained model. cache_dir: The directory in which to store temporary files.
Returns
A pipeline to be used for asr.
Expand source code
def build_pipeline( pretrained_location: str, processor_location: Optional[str] = None, cache_dir: Optional[Path] = None, ) -> ASRPipeline: """Builds the pipeline from the supplied pretrained location. Parameters: pretrained_location: A huggingface model name, or local path to the pretrained model. cache_dir: The directory in which to store temporary files. Returns: A pipeline to be used for asr. """ if processor_location is None: processor_location = pretrained_location processor = AutoProcessor.from_pretrained(processor_location, cache_dir=cache_dir) model = AutoModelForCTC.from_pretrained( pretrained_location, cache_dir=cache_dir, pad_token_id=processor.tokenizer.pad_token_id, ) return pipeline( task=TASK, model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, ) # type: ignore
def build_text(annotations: List[Annotation]) ‑> str
-
Combines all the text from a list of annotations, ordered from earliest start time.
Parameters
annotations: The list of annotations to Combines
Returns
The combined transcripts.
Expand source code
def build_text(annotations: List[Annotation]) -> str: """Combines all the text from a list of annotations, ordered from earliest start time. Parameters: annotations: The list of annotations to Combines Returns: The combined transcripts. """ annotations = sorted( annotations, key=( lambda annotation: annotation.start_ms if annotation.start_ms is not None else 0 ), ) return " ".join(annotation.transcript for annotation in annotations)
def transcribe(audio: pathlib.Path, asr: transformers.pipelines.automatic_speech_recognition.AutomaticSpeechRecognitionPipeline, chunk_length_s=10) ‑> List[Annotation]
-
Transcribes the given audio and gives back the resulting annotations.
Parameters
audio: The path to the audio file to transcribe. asr: The automatic speech recognition pipeline. chunk_length_s: The amount of seconds per audio chunk in the pipeline.
Returns
A list of the inferred annotations in the given audio.
Expand source code
def transcribe(audio: Path, asr: ASRPipeline, chunk_length_s=10) -> List[Annotation]: """Transcribes the given audio and gives back the resulting annotations. Parameters: audio: The path to the audio file to transcribe. asr: The automatic speech recognition pipeline. chunk_length_s: The amount of seconds per audio chunk in the pipeline. Returns: A list of the inferred annotations in the given audio. """ preds: Dict[str, Any] = asr(str(audio), chunk_length_s=chunk_length_s, return_timestamps="word") # type: ignore chunks = preds["chunks"] return list(map(lambda chunk: annotation_from_chunk(chunk, audio), chunks))