[crunchyroll] Extract subtitles extraction routine
This commit is contained in:
parent
e26be70bca
commit
0385d64223
1 changed files with 13 additions and 17 deletions
|
@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _decrypt_subtitles(self, data, iv, id):
|
def _decrypt_subtitles(self, data, iv, id):
|
||||||
data = bytes_to_intlist(data)
|
data = bytes_to_intlist(base64.b64decode(data))
|
||||||
iv = bytes_to_intlist(iv)
|
iv = bytes_to_intlist(base64.b64decode(iv))
|
||||||
id = int(id)
|
id = int(id)
|
||||||
|
|
||||||
def obfuscate_key_aux(count, modulo, start):
|
def obfuscate_key_aux(count, modulo, start):
|
||||||
|
@ -179,6 +179,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def _extract_subtitles(self, subtitle):
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
return [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self._convert_subtitles_to_srt(sub_root),
|
||||||
|
}, {
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._convert_subtitles_to_ass(sub_root),
|
||||||
|
}]
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
|
@ -190,25 +200,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
if not id or not iv or not data:
|
if not id or not iv or not data:
|
||||||
continue
|
continue
|
||||||
id = int(id)
|
|
||||||
iv = base64.b64decode(iv)
|
|
||||||
data = base64.b64decode(data)
|
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
subtitles[lang_code] = self._extract_subtitles(subtitle)
|
||||||
subtitles[lang_code] = [
|
|
||||||
{
|
|
||||||
'ext': 'srt',
|
|
||||||
'data': self._convert_subtitles_to_srt(sub_root),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'ext': 'ass',
|
|
||||||
'data': self._convert_subtitles_to_ass(sub_root),
|
|
||||||
},
|
|
||||||
]
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
Loading…
Reference in a new issue