-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
328 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#include "Common.h" | ||
|
||
namespace G2pTest { | ||
QStringList readData(const QString &filename) { | ||
QStringList dataLines; | ||
QFile file(filename); | ||
if (file.open(QIODevice::ReadOnly | QIODevice::Text)) { | ||
QTextStream in(&file); | ||
while (!in.atEnd()) { | ||
QString line = in.readLine(); | ||
dataLines.append(line); | ||
} | ||
file.close(); | ||
} | ||
return dataLines; | ||
} | ||
} // G2pTest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef DATASET_TOOLS_COMMON_H | ||
#define DATASET_TOOLS_COMMON_H | ||
|
||
#include <QFile> | ||
#include <QStringList> | ||
#include <QTextStream> | ||
|
||
namespace G2pTest { | ||
|
||
QStringList readData(const QString &filename); | ||
|
||
} // G2pTest | ||
|
||
#endif // DATASET_TOOLS_COMMON_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#include "JyuptingTest.h" | ||
#include "Common.h" | ||
|
||
#include <QDebug> | ||
#include <QTextCodec> | ||
|
||
|
||
namespace G2pTest { | ||
JyuptingTest::JyuptingTest() : g2p_can(new IKg2p::ZhG2p("cantonese")) { | ||
} | ||
|
||
JyuptingTest::~JyuptingTest() { | ||
} | ||
|
||
bool JyuptingTest::convertNumTest() { | ||
QString raw1 = "明月@1几32时有##一"; | ||
QString tar1 = "ming jyut jat gei saam ji si jau jat"; | ||
QString res1 = g2p_can->convert(raw1, false, true); | ||
if (res1 != tar1) { | ||
qDebug() << "raw1:" << raw1; | ||
qDebug() << "tar1:" << tar1; | ||
qDebug() << "res1:" << res1; | ||
return false; | ||
} | ||
|
||
QString raw2 = "明月@1几32时有##一"; | ||
QString tar2 = "ming jyut 1 gei 3 2 si jau jat"; | ||
QString res2 = g2p_can->convert(raw1, false, false); | ||
if (res2 != tar2) { | ||
qDebug() << "raw2:" << raw2; | ||
qDebug() << "tar2:" << tar2; | ||
qDebug() << "res2:" << res2; | ||
return false; | ||
} | ||
|
||
qDebug() << "\nconvertNumTest: success"; | ||
return true; | ||
} | ||
|
||
bool JyuptingTest::removeToneTest() { | ||
QString raw1 = "明月@1几32时有##一"; | ||
QString tar1 = "ming4 jyut6 jat1 gei2 saam1 ji6 si4 jau5 jat1"; | ||
QString res1 = g2p_can->convert(raw1, true, true); | ||
if (res1 != tar1) { | ||
qDebug() << "raw1:" << raw1; | ||
qDebug() << "tar1:" << tar1; | ||
qDebug() << "res1:" << res1; | ||
return false; | ||
} | ||
|
||
QString raw2 = "明月@1几32时有##一"; | ||
QString tar2 = "ming4 jyut6 1 gei2 3 2 si4 jau5 jat1"; | ||
QString res2 = g2p_can->convert(raw1, true, false); | ||
if (res2 != tar2) { | ||
qDebug() << "raw2:" << raw2; | ||
qDebug() << "tar2:" << tar2; | ||
qDebug() << "res2:" << res2; | ||
return false; | ||
} | ||
|
||
qDebug() << "\nremoveToneTest: success"; | ||
return true; | ||
} | ||
bool JyuptingTest::batchTest(bool resDisplay) { | ||
int count = 0; | ||
int error = 0; | ||
|
||
QTextCodec *utf8 = QTextCodec::codecForName("UTF-8"); | ||
QStringList dataLines; | ||
dataLines = readData(R"(D:\projects\dataset-tools\op_lab.txt)"); | ||
foreach (const QString &line, dataLines) { | ||
|
||
QString trimmedLine = utf8->toUnicode(line.toLocal8Bit()).trimmed(); | ||
QStringList keyValuePair = trimmedLine.split('|'); | ||
|
||
if (keyValuePair.size() == 2) { | ||
QString key = keyValuePair[0]; | ||
|
||
QString value = keyValuePair[1]; | ||
QString result = g2p_can->convert(key, false, true); | ||
|
||
QStringList words = value.split(" "); | ||
int wordSize = words.size(); | ||
count += wordSize; | ||
|
||
bool diff = false; | ||
QStringList resWords = result.split(" "); | ||
for (int i = 0; i < wordSize; i++) { | ||
if (words[i] != resWords[i] && !words[i].split("/").contains(resWords[i])) { | ||
diff = true; | ||
error++; | ||
} | ||
} | ||
|
||
if (resDisplay && diff) { | ||
qDebug() << "text: " << key; | ||
qDebug() << "raw: " << value; | ||
qDebug() << "res: " << result; | ||
} | ||
} else { | ||
qDebug() << keyValuePair; | ||
} | ||
} | ||
|
||
double percentage = ((double) error / (double) count) * 100.0; | ||
|
||
qDebug() << "\n--------------------"; | ||
qDebug() << "batchTest: success"; | ||
qDebug() << "错误率: " << percentage << "%"; | ||
qDebug() << "错误数: " << error; | ||
qDebug() << "总字数: " << count; | ||
qDebug() << "--------------------"; | ||
return true; | ||
} | ||
|
||
} // G2pTest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#ifndef DATASET_TOOLS_JYUPTINGTEST_H | ||
#define DATASET_TOOLS_JYUPTINGTEST_H | ||
|
||
#include "g2pglobal.h" | ||
#include "zhg2p.h" | ||
|
||
namespace G2pTest { | ||
|
||
class JyuptingTest { | ||
public: | ||
explicit JyuptingTest(); | ||
~JyuptingTest(); | ||
bool convertNumTest(); | ||
bool removeToneTest(); | ||
bool batchTest(bool resDisplay = false); | ||
|
||
private: | ||
QScopedPointer<IKg2p::ZhG2p> g2p_can; | ||
}; | ||
|
||
} // G2pTest | ||
|
||
#endif // DATASET_TOOLS_JYUPTINGTEST_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#include "ManTest.h" | ||
#include "Common.h" | ||
|
||
#include <QDebug> | ||
#include <QTextCodec> | ||
|
||
namespace G2pTest { | ||
ManTest::ManTest() : g2p_zh(new IKg2p::ZhG2p("mandarin")) { | ||
} | ||
|
||
ManTest::~ManTest() { | ||
} | ||
|
||
bool ManTest::convertNumTest() { | ||
QString raw1 = "明月@1几32时有##一"; | ||
QString tar1 = "ming yue yi ji san er shi you yi"; | ||
QString res1 = g2p_zh->convert(raw1, false, true); | ||
if (res1 != tar1) { | ||
qDebug() << "raw1:" << raw1; | ||
qDebug() << "tar1:" << tar1; | ||
qDebug() << "res1:" << res1; | ||
return false; | ||
} | ||
|
||
QString raw2 = "明月@1几32时有##一"; | ||
QString tar2 = "ming yue 1 ji 3 2 shi you yi"; | ||
QString res2 = g2p_zh->convert(raw1, false, false); | ||
if (res2 != tar2) { | ||
qDebug() << "raw2:" << raw2; | ||
qDebug() << "tar2:" << tar2; | ||
qDebug() << "res2:" << res2; | ||
return false; | ||
} | ||
|
||
qDebug() << "\nconvertNumTest: success"; | ||
return true; | ||
} | ||
|
||
bool ManTest::removeToneTest() { | ||
QString raw1 = "明月@1几32时有##一"; | ||
QString tar1 = "ming2 yue4 yi1 ji3 san1 er4 shi2 you3 yi1"; | ||
QString res1 = g2p_zh->convert(raw1, true, true); | ||
if (res1 != tar1) { | ||
qDebug() << "raw1:" << raw1; | ||
qDebug() << "tar1:" << tar1; | ||
qDebug() << "res1:" << res1; | ||
return false; | ||
} | ||
|
||
QString raw2 = "明月@1几32时有##一"; | ||
QString tar2 = "ming2 yue4 1 ji3 3 2 shi2 you3 yi1"; | ||
QString res2 = g2p_zh->convert(raw1, true, false); | ||
if (res2 != tar2) { | ||
qDebug() << "raw2:" << raw2; | ||
qDebug() << "tar2:" << tar2; | ||
qDebug() << "res2:" << res2; | ||
return false; | ||
} | ||
|
||
qDebug() << "\nremoveToneTest: success"; | ||
return true; | ||
} | ||
bool ManTest::batchTest(bool resDisplay) { | ||
int count = 0; | ||
int error = 0; | ||
|
||
QTextCodec *utf8 = QTextCodec::codecForName("UTF-8"); | ||
QStringList dataLines; | ||
dataLines = readData(R"(D:\projects\dataset-tools\op_lab.txt)"); | ||
foreach (const QString &line, dataLines) { | ||
|
||
QString trimmedLine = utf8->toUnicode(line.toLocal8Bit()).trimmed(); | ||
QStringList keyValuePair = trimmedLine.split('|'); | ||
|
||
if (keyValuePair.size() == 2) { | ||
QString key = keyValuePair[0]; | ||
|
||
QString value = keyValuePair[1]; | ||
QString result = g2p_zh->convert(key, false, true); | ||
|
||
QStringList words = value.split(" "); | ||
int wordSize = words.size(); | ||
count += wordSize; | ||
|
||
bool diff = false; | ||
QStringList resWords = result.split(" "); | ||
for (int i = 0; i < wordSize; i++) { | ||
if (words[i] != resWords[i] && !words[i].split("/").contains(resWords[i])) { | ||
diff = true; | ||
error++; | ||
} | ||
} | ||
|
||
if (resDisplay && diff) { | ||
qDebug() << "text: " << key; | ||
qDebug() << "raw: " << value; | ||
qDebug() << "res: " << result; | ||
} | ||
} else { | ||
qDebug() << keyValuePair; | ||
} | ||
} | ||
|
||
double percentage = ((double) error / (double) count) * 100.0; | ||
|
||
qDebug() << "\n--------------------"; | ||
qDebug() << "batchTest: success"; | ||
qDebug() << "错误率: " << percentage << "%"; | ||
qDebug() << "错误数: " << error; | ||
qDebug() << "总字数: " << count; | ||
qDebug() << "--------------------"; | ||
return true; | ||
} | ||
|
||
} // G2pTest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#ifndef DATASET_TOOLS_MANTEST_H | ||
#define DATASET_TOOLS_MANTEST_H | ||
|
||
#include "g2pglobal.h" | ||
#include "zhg2p.h" | ||
|
||
namespace G2pTest { | ||
|
||
class ManTest { | ||
public: | ||
explicit ManTest(); | ||
~ManTest(); | ||
bool convertNumTest(); | ||
bool removeToneTest(); | ||
bool batchTest(bool resDisplay = false); | ||
|
||
private: | ||
QScopedPointer<IKg2p::ZhG2p> g2p_zh; | ||
}; | ||
|
||
} // G2pTest | ||
|
||
#endif // DATASET_TOOLS_MANTEST_H |
Oops, something went wrong.