Module elpis.transcriber

Expand source code
from elpis.transcriber.results import build_elan, build_text
from elpis.transcriber.transcribe import build_pipeline, transcribe

__all__ = ["build_elan", "build_text", "build_pipeline", "transcribe"]

Sub-modules

elpis.transcriber.results

Functions

def build_elan(annotations: List[Annotation], tier_name: str = 'Phrase') ‑> pympi.Elan.Eaf

Builds an elan file from the given annotations and tier_name.

Parameters

annotations: The list of annotations to add. tier_name: The name of the tier to add the annotations under.

Returns

The resulting elan file.

Expand source code
def build_elan(annotations: List[Annotation], tier_name: str = "Phrase") -> Eaf:
    """Builds an elan file from the given annotations and tier_name.

    Parameters:
        annotations: The list of annotations to add.
        tier_name: The name of the tier to add the annotations under.

    Returns:
        The resulting elan file.
    """
    result = Eaf()
    result.add_tier(tier_name)
    for annotation in annotations:
        result.add_annotation(
            id_tier=tier_name,
            start=annotation.start_ms,
            end=annotation.stop_ms,
            value=annotation.transcript,
        )
    return result
def build_pipeline(pretrained_location: str, processor_location: Optional[str] = None, cache_dir: Optional[pathlib.Path] = None) ‑> transformers.pipelines.automatic_speech_recognition.AutomaticSpeechRecognitionPipeline

Builds the pipeline from the supplied pretrained location.

Parameters

pretrained_location: A huggingface model name, or local path to the pretrained model. cache_dir: The directory in which to store temporary files.

Returns

A pipeline to be used for asr.

Expand source code
def build_pipeline(
    pretrained_location: str,
    processor_location: Optional[str] = None,
    cache_dir: Optional[Path] = None,
) -> ASRPipeline:
    """Builds the pipeline from the supplied pretrained location.

    Parameters:
        pretrained_location: A huggingface model name, or local path to the
            pretrained model.
        cache_dir: The directory in which to store temporary files.

    Returns:
        A pipeline to be used for asr.
    """
    if processor_location is None:
        processor_location = pretrained_location

    processor = AutoProcessor.from_pretrained(processor_location, cache_dir=cache_dir)
    model = AutoModelForCTC.from_pretrained(
        pretrained_location,
        cache_dir=cache_dir,
        pad_token_id=processor.tokenizer.pad_token_id,
    )

    return pipeline(
        task=TASK,
        model=model,
        tokenizer=processor.tokenizer,
        feature_extractor=processor.feature_extractor,
    )  # type: ignore
def build_text(annotations: List[Annotation]) ‑> str

Combines all the text from a list of annotations, ordered from earliest start time.

Parameters

annotations: The list of annotations to Combines

Returns

The combined transcripts.

Expand source code
def build_text(annotations: List[Annotation]) -> str:
    """Combines all the text from a list of annotations, ordered from earliest
    start time.

    Parameters:
        annotations: The list of annotations to Combines

    Returns:
        The combined transcripts.
    """
    annotations = sorted(
        annotations,
        key=(
            lambda annotation: annotation.start_ms
            if annotation.start_ms is not None
            else 0
        ),
    )
    return " ".join(annotation.transcript for annotation in annotations)
def transcribe(audio: pathlib.Path, asr: transformers.pipelines.automatic_speech_recognition.AutomaticSpeechRecognitionPipeline, chunk_length_s=10) ‑> List[Annotation]

Transcribes the given audio and gives back the resulting annotations.

Parameters

audio: The path to the audio file to transcribe. asr: The automatic speech recognition pipeline. chunk_length_s: The amount of seconds per audio chunk in the pipeline.

Returns

A list of the inferred annotations in the given audio.

Expand source code
def transcribe(audio: Path, asr: ASRPipeline, chunk_length_s=10) -> List[Annotation]:
    """Transcribes the given audio and gives back the resulting annotations.

    Parameters:
        audio: The path to the audio file to transcribe.
        asr: The automatic speech recognition pipeline.
        chunk_length_s: The amount of seconds per audio chunk in the pipeline.

    Returns:
        A list of the inferred annotations in the given audio.
    """
    preds: Dict[str, Any] = asr(str(audio), chunk_length_s=chunk_length_s, return_timestamps="word")  # type: ignore
    chunks = preds["chunks"]

    return list(map(lambda chunk: annotation_from_chunk(chunk, audio), chunks))