qt按照不同编码格式读取文字(UTF-16LE,UTF-8,UTF-8BOM,UTF-16BE)
enum class EncodingFormat : int
{
ANSI = 0,//GBK
UTF16LE,
UTF16BE,
UTF8,
UTF8BOM,
};
EncodingFormat VideoPlayer::FileCharacterEncoding(const QString &fileName)
{
//假定默认编码utf8
EncodingFormat code = EncodingFormat::UTF8;
QFile file(fileName);
if (file.open(QIODevice::ReadOnly))
{
//读取3字节用于判断
QByteArray buffer = file.read(3);
quint8 sz1st = buffer.at(0);
quint8 sz2nd = buffer.at(1);
quint8 sz3rd = buffer.at(2);
if (sz1st == 0xFF && sz2nd == 0xFE)
{
code = EncodingFormat::UTF16LE;
}
else if (sz1st == 0xFE && sz2nd == 0xFF)
{
code = EncodingFormat::UTF16BE;
}
else if (sz1st == 0xEF && sz2nd == 0xBB && sz3rd == 0xBF)
{
code = EncodingFormat::UTF8BOM;
}
else
{
//尝试用utf8转换,如果无效字符数大于0,则表示是ansi编码
QTextCodec::ConverterState cs;
QTextCodec* tc = QTextCodec::codecForName("utf-8");
tc->toUnicode(buffer.constData(), buffer.size(), &cs);
code = (cs.invalidChars > 0) ? EncodingFormat::ANSI : EncodingFormat::UTF8;
}
file.close();
}
return code;
}
上面是解析文件格式,下面是按照格式读取内容
QString srtfile = "D://test.srt";
EncodingFormat code = FileCharacterEncoding(srtfile);
QFile file(srtfile);
if(!file.open(QIODevice::ReadOnly)) {
qDebug()<<"未找到外挂字幕文件:"<<srtfile<<endl;
}
m_SrtInfoLst.clear();
QTextCodec::ConverterState state;
QTextCodec *codec = QTextCodec::codecForName("UTF-8");
if(code==EncodingFormat::UTF16LE)
{
codec = QTextCodec::codecForName("UTF-16LE");
}
else if(code==EncodingFormat::UTF8)
{
codec = QTextCodec::codecForName("UTF-8");
}
else if(code==EncodingFormat::UTF8BOM)
{
codec = QTextCodec::codecForName("UTF-8");
}
else if(code==EncodingFormat::UTF16BE)
{
codec = QTextCodec::codecForName("UTF-16BE");
}
QTextStream stream_src(&file);
stream_src.setCodec(codec);//这里就是按照对应格式解析出来了
while(!stream_src.atEnd()) {
QString str = stream_src.readLine();//一行一行获取内容
//TODO:......
}
file.close();