dirt_china = {} for i inrange(len(chi_ass)): dirt_china[" ".join('%s' %idforidin chi_ass.iloc[i])[59:][(" ".join('%s' %idforidin chi_ass.iloc[i])[59:].find("}")+1):]] = " ".join('%s' %idforidin chi_ass.iloc[i])[59:]
dirt_china
1 2 3 4 5 6 7 8
{'Lights out!': '熄灯!\\N{\\ren}Lights out!', 'Cletus, are you there?': '克莱图斯,你在吗?\\N{\\ren}Cletus, are you there?', 'My angel.': '我的天使\\N{\\ren}My angel.', 'This is for you.': '这个给你\\N{\\ren}This is for you.', 'What did the doctor say?': '医生怎么说?\\N{\\ren}What did the doctor say?', 'He said the mutations are advancing.': '他说基因突变正在恶化\\N{\\ren}He said the mutations are advancing.', 'My powers are too strong.': '我的力量太强大了\\N{\\ren}My powers are too strong.', "Cletus, I'm scared.": "克莱图斯,我很害怕\\N{\\ren}Cletus, I'm scared.",
之后进行匹配后发现,英文字幕中有些英语句子是分段的,类似于
1
I love you,ok
会变成
1
I love you,
1
ok
但这在字典中我们的key是连续的,所以要将这部分的字幕组合到一行
1 2 3 4 5 6 7 8 9 10
###多行合并一行 i = 1 while i < len(eng_ass): if" ".join('%s' %idforidin eng_ass.iloc[i-1])[0].isalpha(): if" ".join('%s' %idforidin eng_ass.iloc[i])[0].isalpha(): eng_ass.iloc[i-1] = " ".join('%s' %idforidin eng_ass.iloc[i-1]) + " "+" ".join('%s' %idforidin eng_ass.iloc[i]) eng_ass.iloc[i] = "buxuyaodehang" i += 1 ##保存之后手动去除"buxuyaodehang" eng_ass.to_csv("eng_ass_temp.txt",index=False)