"""Import subtitles from various file formats."""
from .subtitle import Subtitle
from .subtitles import Subtitles
from . import formats
import srt
import aeidon
import chardet
from pathlib import Path
def detect_encoding(file):
"""
Detects the text encoding of the given `file`.
Reads the file line by line and passes each line to the detector
provided by the [Chardet][chardet] library, which will usually
recognize the encoding correctly based on the file's content.
[chardet]: https://pypi.org/project/chardet
"""
detector = chardet.universaldetector.UniversalDetector()
with file.open('rb') as stream:
for line in stream:
detector.feed(line)
if detector.done:
break
detector.close()
encoding = detector.result['encoding']
rename = {
'utf-8': 'UTF-8',
'UTF-8-SIG': 'UTF-8-sig',
}
if encoding in rename:
encoding = rename[encoding]
return encoding
def from_aeidon(aeidon_subtitles, markup):
"""Convert subtitles from Aeidon's format to the internal one."""
subtitles = []
for sub in aeidon_subtitles:
subtitle = Subtitle()
subtitle.text = markup.convert(sub.main_text)
subtitle.start = round(1000 * sub.start_seconds)
subtitle.end = round(1000 * sub.end_seconds)
subtitles.append(subtitle)
return subtitles
def read_srt(file, encoding):
"""Reader for SubRip (.srt) subtitle files."""
content = file.read_text(encoding=encoding)
srt_subs = list(srt.parse(content))
subtitles = []
for sub in srt_subs:
lines = sub.content.splitlines()
start = round(sub.start.total_seconds() * 1000)
end = round(sub.end.total_seconds() * 1000)
subtitles.append(Subtitle(lines, start, end-start))
source = {'file': file, 'encoding': encoding}
return Subtitles(subtitles, source)
def read_ass(file, encoding):
"""Reader for Advanced Substation Alpha (.ass) subtitle files."""
reader = aeidon.files.AdvSubStationAlpha(file, encoding)
markup = aeidon.MarkupConverter(aeidon.formats.ASS, aeidon.formats.SUBRIP)
source = {'file': file, 'encoding': encoding}
return Subtitles(from_aeidon(reader.read(), markup), source)
def read_ssa(file, encoding):
"""Reader for Substation Alpha (.ssa) subtitle files."""
reader = aeidon.files.SubStationAlpha(file, encoding)
markup = aeidon.MarkupConverter(aeidon.formats.SSA, aeidon.formats.SUBRIP)
source = {'file': file, 'encoding': encoding}
return Subtitles(from_aeidon(reader.read(), markup), source)
def read_vtt(file, encoding):
"""Reader for Web Video Text Tracks (.vtt) subtitle files."""
reader = aeidon.files.WebVTT(file, encoding)
markup = aeidon.MarkupConverter(aeidon.formats.WEBVTT,
aeidon.formats.SUBRIP)
source = {'file': file, 'encoding': encoding}
return Subtitles(from_aeidon(reader.read(), markup), source)
def read_sub(file, encoding):
"""Reader for SubViewer 2.0 (.sub) subtitle files."""
reader = aeidon.files.SubViewer2(file, encoding)
markup = aeidon.MarkupConverter(aeidon.formats.SUBVIEWER2,
aeidon.formats.SUBRIP)
source = {'file': file, 'encoding': encoding}
return Subtitles(from_aeidon(reader.read(), markup), source)
readers = {
'SubRip': read_srt,
'ASS': read_ass,
'SSA': read_ssa,
'WebVTT': read_vtt,
'SubViewer': read_sub,
}
[docs]def load(file, format=None, encoding=None):
"""
Loads subtitles from a file.
`file` is preferably a `pathlib.Path` object, but may also be a
string denoting an absolute or relative file path.
The file `format`, if not explicitly specified, is deduced from the
file ending. The file's text `encoding` can be given, but is otherwise
detected automatically (which may fail in some rare cases).
Returns a `Subtitles` object.
"""
file = Path(file)
if format is None:
suffix = file.suffix.lower()
if suffix in formats:
format = formats[suffix]
else:
raise ValueError(f'Unknown file ending "{suffix}".')
if format in readers:
reader = readers[format]
else:
raise ValueError(f'No reader for subtitles format "{format}".')
if not encoding:
encoding = detect_encoding(file)
return reader(file, encoding)
read = load
"""Alias for `load()`."""