diff --git a/src/tests/G2pTest/Common.cpp b/src/tests/G2pTest/Common.cpp new file mode 100644 index 0000000..31b5f13 --- /dev/null +++ b/src/tests/G2pTest/Common.cpp @@ -0,0 +1,17 @@ +#include "Common.h" + +namespace G2pTest { + QStringList readData(const QString &filename) { + QStringList dataLines; + QFile file(filename); + if (file.open(QIODevice::ReadOnly | QIODevice::Text)) { + QTextStream in(&file); + while (!in.atEnd()) { + QString line = in.readLine(); + dataLines.append(line); + } + file.close(); + } + return dataLines; + } +} // G2pTest \ No newline at end of file diff --git a/src/tests/G2pTest/Common.h b/src/tests/G2pTest/Common.h new file mode 100644 index 0000000..d6686b5 --- /dev/null +++ b/src/tests/G2pTest/Common.h @@ -0,0 +1,14 @@ +#ifndef DATASET_TOOLS_COMMON_H +#define DATASET_TOOLS_COMMON_H + +#include +#include +#include + +namespace G2pTest { + + QStringList readData(const QString &filename); + +} // G2pTest + +#endif // DATASET_TOOLS_COMMON_H diff --git a/src/tests/G2pTest/JyuptingTest.cpp b/src/tests/G2pTest/JyuptingTest.cpp new file mode 100644 index 0000000..3660fc6 --- /dev/null +++ b/src/tests/G2pTest/JyuptingTest.cpp @@ -0,0 +1,116 @@ +#include "JyuptingTest.h" +#include "Common.h" + +#include +#include + + +namespace G2pTest { + JyuptingTest::JyuptingTest() : g2p_can(new IKg2p::ZhG2p("cantonese")) { + } + + JyuptingTest::~JyuptingTest() { + } + + bool JyuptingTest::convertNumTest() { + QString raw1 = "明月@1几32时有##一"; + QString tar1 = "ming jyut jat gei saam ji si jau jat"; + QString res1 = g2p_can->convert(raw1, false, true); + if (res1 != tar1) { + qDebug() << "raw1:" << raw1; + qDebug() << "tar1:" << tar1; + qDebug() << "res1:" << res1; + return false; + } + + QString raw2 = "明月@1几32时有##一"; + QString tar2 = "ming jyut 1 gei 3 2 si jau jat"; + QString res2 = g2p_can->convert(raw1, false, false); + if (res2 != tar2) { + qDebug() << "raw2:" << raw2; + qDebug() << "tar2:" << tar2; + qDebug() << "res2:" << res2; + return false; + } + + qDebug() << "\nconvertNumTest: success"; + return true; + } + + bool JyuptingTest::removeToneTest() { + QString raw1 = "明月@1几32时有##一"; + QString tar1 = "ming4 jyut6 jat1 gei2 saam1 ji6 si4 jau5 jat1"; + QString res1 = g2p_can->convert(raw1, true, true); + if (res1 != tar1) { + qDebug() << "raw1:" << raw1; + qDebug() << "tar1:" << tar1; + qDebug() << "res1:" << res1; + return false; + } + + QString raw2 = "明月@1几32时有##一"; + QString tar2 = "ming4 jyut6 1 gei2 3 2 si4 jau5 jat1"; + QString res2 = g2p_can->convert(raw1, true, false); + if (res2 != tar2) { + qDebug() << "raw2:" << raw2; + qDebug() << "tar2:" << tar2; + qDebug() << "res2:" << res2; + return false; + } + + qDebug() << "\nremoveToneTest: success"; + return true; + } + bool JyuptingTest::batchTest(bool resDisplay) { + int count = 0; + int error = 0; + + QTextCodec *utf8 = QTextCodec::codecForName("UTF-8"); + QStringList dataLines; + dataLines = readData(R"(D:\projects\dataset-tools\op_lab.txt)"); + foreach (const QString &line, dataLines) { + + QString trimmedLine = utf8->toUnicode(line.toLocal8Bit()).trimmed(); + QStringList keyValuePair = trimmedLine.split('|'); + + if (keyValuePair.size() == 2) { + QString key = keyValuePair[0]; + + QString value = keyValuePair[1]; + QString result = g2p_can->convert(key, false, true); + + QStringList words = value.split(" "); + int wordSize = words.size(); + count += wordSize; + + bool diff = false; + QStringList resWords = result.split(" "); + for (int i = 0; i < wordSize; i++) { + if (words[i] != resWords[i] && !words[i].split("/").contains(resWords[i])) { + diff = true; + error++; + } + } + + if (resDisplay && diff) { + qDebug() << "text: " << key; + qDebug() << "raw: " << value; + qDebug() << "res: " << result; + } + } else { + qDebug() << keyValuePair; + } + } + + double percentage = ((double) error / (double) count) * 100.0; + + qDebug() << "\n--------------------"; + qDebug() << "batchTest: success"; + qDebug() << "错误率: " << percentage << "%"; + qDebug() << "错误数: " << error; + qDebug() << "总字数: " << count; + qDebug() << "--------------------"; + return true; + } + +} // G2pTest \ No newline at end of file diff --git a/src/tests/G2pTest/JyuptingTest.h b/src/tests/G2pTest/JyuptingTest.h new file mode 100644 index 0000000..87bbb63 --- /dev/null +++ b/src/tests/G2pTest/JyuptingTest.h @@ -0,0 +1,23 @@ +#ifndef DATASET_TOOLS_JYUPTINGTEST_H +#define DATASET_TOOLS_JYUPTINGTEST_H + +#include "g2pglobal.h" +#include "zhg2p.h" + +namespace G2pTest { + + class JyuptingTest { + public: + explicit JyuptingTest(); + ~JyuptingTest(); + bool convertNumTest(); + bool removeToneTest(); + bool batchTest(bool resDisplay = false); + + private: + QScopedPointer g2p_can; + }; + +} // G2pTest + +#endif // DATASET_TOOLS_JYUPTINGTEST_H diff --git a/src/tests/G2pTest/ManTest.cpp b/src/tests/G2pTest/ManTest.cpp new file mode 100644 index 0000000..830c2c1 --- /dev/null +++ b/src/tests/G2pTest/ManTest.cpp @@ -0,0 +1,115 @@ +#include "ManTest.h" +#include "Common.h" + +#include +#include + +namespace G2pTest { + ManTest::ManTest() : g2p_zh(new IKg2p::ZhG2p("mandarin")) { + } + + ManTest::~ManTest() { + } + + bool ManTest::convertNumTest() { + QString raw1 = "明月@1几32时有##一"; + QString tar1 = "ming yue yi ji san er shi you yi"; + QString res1 = g2p_zh->convert(raw1, false, true); + if (res1 != tar1) { + qDebug() << "raw1:" << raw1; + qDebug() << "tar1:" << tar1; + qDebug() << "res1:" << res1; + return false; + } + + QString raw2 = "明月@1几32时有##一"; + QString tar2 = "ming yue 1 ji 3 2 shi you yi"; + QString res2 = g2p_zh->convert(raw1, false, false); + if (res2 != tar2) { + qDebug() << "raw2:" << raw2; + qDebug() << "tar2:" << tar2; + qDebug() << "res2:" << res2; + return false; + } + + qDebug() << "\nconvertNumTest: success"; + return true; + } + + bool ManTest::removeToneTest() { + QString raw1 = "明月@1几32时有##一"; + QString tar1 = "ming2 yue4 yi1 ji3 san1 er4 shi2 you3 yi1"; + QString res1 = g2p_zh->convert(raw1, true, true); + if (res1 != tar1) { + qDebug() << "raw1:" << raw1; + qDebug() << "tar1:" << tar1; + qDebug() << "res1:" << res1; + return false; + } + + QString raw2 = "明月@1几32时有##一"; + QString tar2 = "ming2 yue4 1 ji3 3 2 shi2 you3 yi1"; + QString res2 = g2p_zh->convert(raw1, true, false); + if (res2 != tar2) { + qDebug() << "raw2:" << raw2; + qDebug() << "tar2:" << tar2; + qDebug() << "res2:" << res2; + return false; + } + + qDebug() << "\nremoveToneTest: success"; + return true; + } + bool ManTest::batchTest(bool resDisplay) { + int count = 0; + int error = 0; + + QTextCodec *utf8 = QTextCodec::codecForName("UTF-8"); + QStringList dataLines; + dataLines = readData(R"(D:\projects\dataset-tools\op_lab.txt)"); + foreach (const QString &line, dataLines) { + + QString trimmedLine = utf8->toUnicode(line.toLocal8Bit()).trimmed(); + QStringList keyValuePair = trimmedLine.split('|'); + + if (keyValuePair.size() == 2) { + QString key = keyValuePair[0]; + + QString value = keyValuePair[1]; + QString result = g2p_zh->convert(key, false, true); + + QStringList words = value.split(" "); + int wordSize = words.size(); + count += wordSize; + + bool diff = false; + QStringList resWords = result.split(" "); + for (int i = 0; i < wordSize; i++) { + if (words[i] != resWords[i] && !words[i].split("/").contains(resWords[i])) { + diff = true; + error++; + } + } + + if (resDisplay && diff) { + qDebug() << "text: " << key; + qDebug() << "raw: " << value; + qDebug() << "res: " << result; + } + } else { + qDebug() << keyValuePair; + } + } + + double percentage = ((double) error / (double) count) * 100.0; + + qDebug() << "\n--------------------"; + qDebug() << "batchTest: success"; + qDebug() << "错误率: " << percentage << "%"; + qDebug() << "错误数: " << error; + qDebug() << "总字数: " << count; + qDebug() << "--------------------"; + return true; + } + +} // G2pTest \ No newline at end of file diff --git a/src/tests/G2pTest/ManTest.h b/src/tests/G2pTest/ManTest.h new file mode 100644 index 0000000..a5b855d --- /dev/null +++ b/src/tests/G2pTest/ManTest.h @@ -0,0 +1,23 @@ +#ifndef DATASET_TOOLS_MANTEST_H +#define DATASET_TOOLS_MANTEST_H + +#include "g2pglobal.h" +#include "zhg2p.h" + +namespace G2pTest { + + class ManTest { + public: + explicit ManTest(); + ~ManTest(); + bool convertNumTest(); + bool removeToneTest(); + bool batchTest(bool resDisplay = false); + + private: + QScopedPointer g2p_zh; + }; + +} // G2pTest + +#endif // DATASET_TOOLS_MANTEST_H diff --git a/src/tests/G2pTest/main.cpp b/src/tests/G2pTest/main.cpp index 468641d..7b701f2 100644 --- a/src/tests/G2pTest/main.cpp +++ b/src/tests/G2pTest/main.cpp @@ -4,92 +4,37 @@ #include "g2pglobal.h" #include "zhg2p.h" #include -#include -#include #include #include #include -#include -#include #include -#include #include -#include -QStringList readData(const QString &filename) { - QStringList dataLines; - QFile file(filename); - if (file.open(QIODevice::ReadOnly | QIODevice::Text)) { - QTextStream in(&file); - while (!in.atEnd()) { - QString line = in.readLine(); - dataLines.append(line); - } - file.close(); - } - return dataLines; -} +#include "JyuptingTest.h" +#include "ManTest.h" + +using namespace G2pTest; int main(int argc, char *argv[]) { QCoreApplication app(argc, argv); IKg2p::setDictionaryPath(qApp->applicationDirPath() + "/dict"); - QTextCodec *utf8 = QTextCodec::codecForName("UTF-8"); - QStringList dataLines; - bool mandarin = true; - bool resDisplay = true; - - IKg2p::ZhG2p *zhG2p; - if (mandarin) { - zhG2p = new IKg2p::ZhG2p("mandarin"); - dataLines = readData(R"(D:\projects\dataset-tools\op_lab.txt)"); - } else { - zhG2p = new IKg2p::ZhG2p("cantonese"); - dataLines = readData(R"(D:\projects\dataset-tools\jyutping_test.txt)"); - } - - int count = 0; - int error = 0; - - - foreach (const QString &line, dataLines) { - QString trimmedLine = utf8->toUnicode(line.toLocal8Bit()).trimmed(); - QStringList keyValuePair = trimmedLine.split('|'); + qDebug() << "**************************"; + qDebug() << "mandarinTest: "; + auto mandarinTest = ManTest(); + mandarinTest.convertNumTest(); + mandarinTest.removeToneTest(); + // mandarinTest.batchTest(); + qDebug() << "\n"; - if (keyValuePair.size() == 2) { - QString key = keyValuePair[0]; + qDebug() << "**************************"; - QString value = keyValuePair[1]; - QString result = zhG2p->convert(key, false, true); + qDebug() << "jyutpingTest: "; + auto jyutpingTest = JyuptingTest(); + jyutpingTest.convertNumTest(); + jyutpingTest.removeToneTest(); + // jyutpingTest.batchTest(); + qDebug() << "\n"; - QStringList words = value.split(" "); - int wordSize = words.size(); - count += wordSize; - - bool diff = false; - QStringList resWords = result.split(" "); - for (int i = 0; i < wordSize; i++) { - if (words[i] != resWords[i] && !words[i].split("/").contains(resWords[i])) { - diff = true; - error++; - } - } - - if (resDisplay && diff) { - qDebug() << "text: " << key; - qDebug() << "raw: " << value; - qDebug() << "res: " << result; - } - } else { - qDebug() << keyValuePair; - } - } - - double percentage = ((double) error / (double) count) * 100.0; - - qDebug() << "错误率: " << percentage << "%"; - qDebug() << "错误数: " << error; - qDebug() << "总字数: " << count; - - return app.exec(); -} \ No newline at end of file + return 0; +}