-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge-ipa.js
116 lines (99 loc) · 4.92 KB
/
merge-ipa.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
const StreamZip = require('node-stream-zip');
const { execSync } = require('child_process');
const { readdirSync, existsSync, readFileSync, writeFileSync, unlinkSync, mkdirSync } = require('fs');
const { writeInBatches } = require('./util/util');
const date = require('date-and-time');
const now = new Date();
const tempFolder = 'data/temp/ipa';
mkdirSync(tempFolder, { recursive: true });
async function main(){
const languages = JSON.parse(readFileSync('languages.json', 'utf8'));
for (const {iso: sourceIso} of languages){
const globalIpa = {};
let globalTags = [];
for (const {iso: targetIso} of languages){
let localIpa = [];
let localTags = [];
const file = `kty-${sourceIso}-${targetIso}-ipa.zip`;
if (existsSync(file)) {
const zip = new StreamZip.async({ file });
const files = Object.keys(await zip.entries());
for (const file of files) {
if(file.startsWith("term_meta_bank_")){
const data = await zip.entryData(file);
const json = JSON.parse(data);
localIpa = localIpa.concat(json);
}
if(file.startsWith("tag_bank_")){
const data = await zip.entryData(file);
const json = JSON.parse(data);
localTags = localTags.concat(json);
}
}
console.log("localIpa", localIpa.length);
console.log("localTags", localTags.length);
await zip.close();
for (const local of localIpa) {
const [term] = local
if(!globalIpa[term]){
globalIpa[term] = local;
} else {
const existingIpas = globalIpa[term][2]['transcriptions']
const newIpas = local[2]['transcriptions']
for (const newIpa of newIpas) {
const existingIpa = existingIpas.find(({ipa}) => ipa === newIpa.ipa);
if(!existingIpa){
existingIpas.push(newIpa);
const newTags = newIpa.tags.map(tag => localTags.find(([tagId]) => tagId === tag));
for (const newTag of newTags) {
if(newTag && !globalTags.find(([tagId]) => tagId === newTag[0])){
globalTags.push(newTag);
}
}
} else {
const newTags = newIpa.tags.filter(tag => !existingIpa.tags.includes(tag));
for (const newTag of newTags) {
existingIpa.tags.push(newTag);
const fullTag = localTags.find(([tagId]) => tagId === newTag);
if(fullTag && !globalTags.find(([tagId]) => tagId === fullTag[0])){
globalTags.push(fullTag);
}
}
}
}
}
}
}
}
const globalIpaLength = Object.keys(globalIpa).length;
if(globalIpaLength) console.log("globalIpa", globalIpaLength);
const globalTagsLength = globalTags.length;
if(globalTagsLength) console.log("globalTags", globalTagsLength);
const url = 'https://github.com/Ajatt-Tools/kaikki-to-rikaitan';
const title = `kty-${sourceIso}-ipa`;
const latestReleaseUrl = `${url}/releases/latest/download/${title}`;
const globalIndex = {
"format": 3,
"revision": date.format(now, 'YYYY.MM.DD'),
"sequenced": true,
title,
url,
"isUpdatable": true,
"indexUrl": `${latestReleaseUrl}-index.json`,
"downloadUrl": `${latestReleaseUrl}.zip`,
}
if(globalIpaLength){
for (const file of readdirSync(tempFolder)) {
unlinkSync(`${tempFolder}/${file}`);
}
writeFileSync(`${tempFolder}/index.json`, JSON.stringify(globalIndex, null, 4));
writeInBatches(tempFolder, Object.values(globalIpa), 'term_meta_bank_', 500000);
writeInBatches(tempFolder, globalTags, 'tag_bank_', 50000);
outputFolder = `data/language/${sourceIso}/`;
mkdirSync(outputFolder, { recursive: true });
execSync(`zip -j ${outputFolder}/${title}.zip ${tempFolder}/*`);
writeFileSync(`${outputFolder}/${title}-index.json`, JSON.stringify(globalIndex, null, 4));
}
}
}
main()