From 5db775f57a12ae55b5d44b0238f4443a68ed6805 Mon Sep 17 00:00:00 2001
From: btangmu
Date: Tue, 19 Nov 2024 18:17:43 -0500
Subject: [PATCH] CLDR-7428 Freeze collators; new class CollatorHelper
-Public CollatorHelper replaces old private versions in CLDRConfig and MapComparator
-New CollatorHelper.ROOT_ORDER, CollatorHelper.ROOT_NUMERIC, CollatorHelper.EMOJI_COLLATOR
-Use those collators everywhere possible instead of recreating
-Fix some mistakes in previous commits where freeze was called too soon
-Remove dead code
---
.../unicode/cldr/unittest/web/TestAll.java | 70 -------------------
.../org/unicode/cldr/icu/ExtractICUData.java | 14 +---
.../org/unicode/cldr/tool/CLDRModify.java | 3 +-
.../org/unicode/cldr/tool/ChartCollation.java | 2 -
.../cldr/tool/ChartLanguageGroups.java | 8 +--
.../org/unicode/cldr/tool/CompareData.java | 11 ---
.../cldr/tool/ConvertLanguageData.java | 11 +--
.../org/unicode/cldr/tool/CountItems.java | 4 +-
.../unicode/cldr/tool/GenerateComparison.java | 4 +-
.../unicode/cldr/tool/GenerateStatistics.java | 5 +-
.../cldr/tool/GenerateTransformCharts.java | 4 +-
.../unicode/cldr/tool/MakeTransliterator.java | 3 +-
.../main/java/org/unicode/cldr/tool/Misc.java | 4 +-
.../java/org/unicode/cldr/tool/ShowData.java | 11 ---
.../org/unicode/cldr/tool/TablePrinter.java | 3 +-
.../org/unicode/cldr/util/CLDRConfig.java | 25 -------
.../org/unicode/cldr/util/CollatorHelper.java | 32 +++++++++
.../org/unicode/cldr/util/MapComparator.java | 20 +-----
18 files changed, 58 insertions(+), 176 deletions(-)
create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/util/CollatorHelper.java
diff --git a/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java b/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java
index 0e8533ecd39..5fdaea6cbb8 100644
--- a/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java
+++ b/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java
@@ -7,9 +7,6 @@
import com.ibm.icu.dev.test.TestFmwk.TestGroup;
import com.ibm.icu.dev.test.TestLog;
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
import java.io.File;
import java.io.PrintWriter;
import java.sql.SQLException;
@@ -17,12 +14,8 @@
import org.unicode.cldr.test.CheckCLDR;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRConfig.Environment;
-import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
-import org.unicode.cldr.util.Factory;
-import org.unicode.cldr.util.StandardCodes;
-import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.web.CLDRProgressIndicator;
import org.unicode.cldr.web.DBUtils;
import org.unicode.cldr.web.SurveyLog;
@@ -129,13 +122,6 @@ public TestAll() {
public static class WebTestInfo {
private static WebTestInfo INSTANCE = null;
- private SupplementalDataInfo supplementalDataInfo;
- private StandardCodes sc;
- private Factory cldrFactory;
- private CLDRFile english;
- private CLDRFile root;
- private RuleBasedCollator col;
-
public static WebTestInfo getInstance() {
synchronized (WebTestInfo.class) {
if (INSTANCE == null) {
@@ -146,62 +132,6 @@ public static WebTestInfo getInstance() {
}
private WebTestInfo() {}
-
- public SupplementalDataInfo getSupplementalDataInfo() {
- synchronized (this) {
- if (supplementalDataInfo == null) {
- supplementalDataInfo =
- SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
- }
- }
- return supplementalDataInfo;
- }
-
- public StandardCodes getStandardCodes() {
- synchronized (this) {
- if (sc == null) {
- sc = StandardCodes.make();
- }
- }
- return sc;
- }
-
- public Factory getCldrFactory() {
- synchronized (this) {
- if (cldrFactory == null) {
- cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
- }
- }
- return cldrFactory;
- }
-
- public CLDRFile getEnglish() {
- synchronized (this) {
- if (english == null) {
- english = getCldrFactory().make("en", true);
- }
- }
- return english;
- }
-
- public CLDRFile getRoot() {
- synchronized (this) {
- if (root == null) {
- root = getCldrFactory().make("root", true);
- }
- }
- return root;
- }
-
- public Collator getCollator() {
- synchronized (this) {
- if (col == null) {
- col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
- col.setNumericCollation(true);
- }
- }
- return col;
- }
}
static boolean dbSetup = false;
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java
index 8fae756845f..f9ee6673215 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java
@@ -12,9 +12,7 @@
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.Transliterator;
-import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
import java.io.BufferedReader;
import java.io.File;
@@ -35,6 +33,7 @@
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
+import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.PathUtilities;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.SimpleFactory;
@@ -411,9 +410,7 @@ static void testProps() {
{UProperty.DOUBLE_START, UProperty.DOUBLE_START},
{UProperty.STRING_START, UProperty.STRING_LIMIT},
};
- Collator col = Collator.getInstance(ULocale.ROOT); // freeze below
- ((RuleBasedCollator) col).setNumericCollation(true);
- col = col.freeze();
+ Collator col = CollatorHelper.ROOT_NUMERIC;
Map> alpha = new TreeMap<>(col);
for (int range = 0; range < ranges.length; ++range) {
@@ -466,13 +463,6 @@ static void testProps() {
}
out.println("");
}
- Collator c = Collator.getInstance(ULocale.ROOT); // freeze below
- ((RuleBasedCollator) c).setNumericCollation(true);
- c = c.freeze();
-
- // int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
- // return UCharacter.getPropertyValueName(propEnum,enumValue, (int)nameChoice);
-
}
private static String getName(int index, String valueName, String shortValueName) {
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java
index 1c2de21f95e..4fce95ad9a8 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java
@@ -3309,8 +3309,9 @@ private static int stepsFromRoot(String origLoc) {
/** Internal */
public static void testJavaSemantics() {
- Collator caseInsensitive = Collator.getInstance(ULocale.ROOT).freeze();
+ Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); // freeze below
caseInsensitive.setStrength(Collator.SECONDARY);
+ caseInsensitive = caseInsensitive.freeze();
Set setWithCaseInsensitive = new TreeSet<>(caseInsensitive);
setWithCaseInsensitive.addAll(Arrays.asList(new String[] {"a", "b", "c"}));
Set plainSet = new TreeSet<>();
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java
index c8791236970..425cf126feb 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java
@@ -258,8 +258,6 @@ private void addCollator(Map data, String type, RuleBasedCollator
dataItem.collator = col;
}
- // RuleBasedCollator ROOT = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT).freeze();
-
private class Subchart extends Chart {
private static final String HIGH_COLLATION_PRIMARY = "\uFFFF";
String title;
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java
index fb7849d571e..9e67b0799ec 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java
@@ -3,8 +3,6 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSet.Builder;
import com.google.common.collect.Multimap;
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.util.ULocale;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
@@ -76,8 +74,6 @@ public String getExplanation() {
+ "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.
\n";
}
- Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ROOT).freeze();
-
@Override
public void writeContents(FormattedFileWriter pw) throws IOException {
@@ -112,7 +108,9 @@ private void show(Multimap lg, String parent, TablePrinter table
new Comparator>() {
@Override
public int compare(Pair o1, Pair o2) {
- int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
+ int diff =
+ CollatorHelper.ROOT_ORDER.compare(
+ o1.getFirst(), o2.getFirst());
if (diff != 0) {
return diff;
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java
index 047096d6e5c..a170825defd 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java
@@ -1,9 +1,6 @@
package org.unicode.cldr.tool;
import com.ibm.icu.dev.util.UOption;
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
@@ -30,14 +27,6 @@ public class CompareData {
String[] directoryList = {"main", "collation", "segmentations"};
- static RuleBasedCollator uca =
- (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); // freeze below
-
- {
- uca.setNumericCollation(true);
- uca = (RuleBasedCollator) uca.freeze();
- }
-
static PrettyPath prettyPathMaker = new PrettyPath();
static CLDRFile english;
static Set locales;
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java
index 8eb9d1b50e0..08e61d4f566 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java
@@ -6,9 +6,7 @@
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
-import com.ibm.icu.text.Collator;
import com.ibm.icu.text.NumberFormat;
-import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
import java.io.BufferedReader;
@@ -42,6 +40,7 @@
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
+import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Iso639Data;
import org.unicode.cldr.util.Iso639Data.Scope;
@@ -1924,12 +1923,6 @@ public String toString() {
public static class GeneralCollator implements Comparator {
static UTF16.StringComparator cpCompare = new UTF16.StringComparator(true, false, 0);
- static RuleBasedCollator UCA =
- (RuleBasedCollator) Collator.getInstance(ULocale.ROOT).freeze();
-
- static {
- UCA.setNumericCollation(true);
- }
@Override
public int compare(String s1, String s2) {
@@ -1938,7 +1931,7 @@ public int compare(String s1, String s2) {
} else if (s2 == null) {
return 1;
}
- int result = UCA.compare(s1, s2);
+ int result = CollatorHelper.ROOT_NUMERIC.compare(s1, s2);
if (result != 0) return result;
return cpCompare.compare(s1, s2);
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java
index e1ac30f31c4..d485507e3d6 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java
@@ -45,6 +45,7 @@
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
+import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Iso639Data;
import org.unicode.cldr.util.IsoCurrencyParser;
@@ -318,8 +319,7 @@ public static void genSupplementalZoneData() throws IOException {
}
public static void genSupplementalZoneData(boolean skipUnaliased) throws IOException {
- RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT).freeze();
- col.setNumericCollation(true);
+ RuleBasedCollator col = (RuleBasedCollator) CollatorHelper.ROOT_NUMERIC;
StandardCodes sc = StandardCodes.make();
Map zone_country = sc.getZoneToCounty();
Map> country_zone = sc.getCountryToZoneSet();
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java
index fa777529bce..41fd242d7f2 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java
@@ -5,7 +5,6 @@
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UTF16;
-import com.ibm.icu.util.ULocale;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
@@ -18,6 +17,7 @@
import org.unicode.cldr.util.CLDRFile.Status;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
+import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Counter;
import org.unicode.cldr.util.EscapingUtilities;
import org.unicode.cldr.util.Factory;
@@ -30,7 +30,7 @@ public class GenerateComparison {
private static PrettyPath prettyPathMaker;
- private static Collator collator = Collator.getInstance(ULocale.ROOT).freeze();
+ private static Collator collator = CollatorHelper.ROOT_ORDER;
static class EnglishRowComparator implements Comparator> {
private static Comparator unicode = new UTF16.StringComparator(true, false, 0);
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java
index 0cee84b6075..997b3c022f6 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java
@@ -26,6 +26,7 @@
import org.unicode.cldr.util.ArrayComparator;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CldrUtility;
+import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.Log;
@@ -40,7 +41,7 @@ class GenerateStatistics {
static CLDRFile english;
static Factory factory;
static LanguageTagParser ltp = new LanguageTagParser();
- static Collator col = Collator.getInstance(ULocale.ROOT).freeze();
+ static Collator col = CollatorHelper.ROOT_ORDER;
static boolean notitlecase = true;
public static void generateSize(
@@ -344,7 +345,7 @@ private static void addCounts(
private static class LanguageList implements Comparable