feat: introduce matchSource match template variable

- Change default fuzzy match pane template with matchSource - Add method MatchesVarExpansion#expandMatchSource - Extend NearString.MATCH_SOURCE to have TM_SUBSEG - Update test expectations of MatchesTextAreaTest, and FindMatchesTest - Add human-readable names of MATCH_SOURCE in Bundle.properties Signed-off-by: Hiroshi Miura <[email protected]>
omegat-org · Dec 17, 2024 · 821d6a9 · 821d6a9
1 parent ecb65e3
commit 821d6a9
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 21 deletions.
diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties
@@ -2947,3 +2947,8 @@ DICTIONARY_LOAD_FILE=Loaded dictionary from '{0}': {1} ms
 DICTIONARY_LOAD_ERROR=Error load dictionary from '{0}': {1} 
 DICTIONARY_MANAGER_ERROR_SAVE_IGNORE=Error saving ignore words"
 EDITOR_CONTROLLER_EXCEPTION=bad location exception when changing case
+
+MATCHES_COMES_FROM_TM=From TM
+MATCHES_COMES_FROM_FILES=Files
+MATCHES_COMES_FROM_MEMORY=From Project
+MATCHES_COMES_FROM_TM_SUBSEG=Sub-segmented match
diff --git a/src/org/omegat/core/matching/NearString.java b/src/org/omegat/core/matching/NearString.java
@@ -49,8 +49,11 @@
  */
 public class NearString {
     public enum MATCH_SOURCE {
-        MEMORY, TM, FILES
-    };
+        MEMORY,
+        TM,
+        FILES,
+        TM_SUBSEG;
+    }
 
     public enum SORT_KEY {
         SCORE, SCORE_NO_STEM, ADJUSTED_SCORE

diff --git a/src/org/omegat/core/statistics/FindMatches.java b/src/org/omegat/core/statistics/FindMatches.java
@@ -63,20 +63,19 @@
 
 /**
  * Class to find matches by specified criteria.
- *
+ * <p>
  * Since we can use stemmers to prepare tokens, we should use 3-pass comparison
  * of similarity. Similarity will be calculated in 3 steps:
- *
- * 1. Split original segment into word-only tokens using stemmer (with stop
- * words list), then compare tokens.
- *
- * 2. Split original segment into word-only tokens without stemmer, then compare
- * tokens.
- *
- * 3. Split original segment into not-only-words tokens (including numbers and
- * tags) without stemmer, then compare tokens.
- *
- * This class is not thread safe ! Must be used in the one thread only.
+ * <ol>
+ * <li>Split the original segment into word-only tokens using stemmer (with stop
+ * words list), then compare tokens.</li>
+ * <li>Split the original segment into word-only tokens without a stemmer,
+ * then compare tokens.</li>
+ * <li>Split the original segment into not-only-words tokens (including numbers
+ * and tags) without a stemmer, then compare tokens.</li>
+ * </ol>
+ * <p>
+ * This class is not thread safe! Must be used in the one thread only.
  *
  * @author Maxym Mykhalchuk
  * @author Alex Buloichik ([email protected])
@@ -150,6 +149,23 @@ public FindMatches(IProject project, int maxCount, boolean allowSeparateSegmentM
                         OConsts.FUZZY_MATCH_THRESHOLD));
     }
 
+    /**
+     * FindMatches find fuzzy matched translation memories.
+     *
+     * @param project
+     *            OmegaT project.
+     * @param segmenter
+     *            used when running a segmentation search.
+     * @param maxCount
+     *            limit the maximum count of the results.
+     * @param searchExactlyTheSame
+     *            allows searching similarities with the same text as a source
+     *            segment. This mode is used only for separate sentence match
+     *            in a paragraph project, i.e., where a source is just part of
+     *            the current source.
+     * @param threshold
+     *            threshold to use.
+     */
     public FindMatches(IProject project, Segmenter segmenter, int maxCount, boolean allowSeparateSegmentMatch,
             boolean searchExactlyTheSame, boolean applyThreshold, int threshold) {
         this.project = project;
@@ -165,6 +181,20 @@ public FindMatches(IProject project, Segmenter segmenter, int maxCount, boolean
         this.applyThreshold = applyThreshold;
     }
 
+    /**
+     * Search Translation memories.
+     *
+     * @param searchText
+     *        target segment or term to search.
+     * @param fillSimilarityData
+     *        fill similarity data into the result of NearString objects.
+     * @param stop
+     *        IStopped callback object to indicate cancel operation.
+     * @return
+     *        List of NearString objects, which hold matched translation entry.
+     * @throws StoppedException
+     *        raised when stopped during a search process.
+     */
     public List<NearString> search(String searchText, boolean requiresTranslation, boolean fillSimilarityData,
             IStopped stop) throws StoppedException {
         result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
@@ -235,8 +265,8 @@ public List<NearString> search(String searchText, boolean requiresTranslation, b
             for (ITMXEntry tmen : en.getValue().getEntries()) {
                 checkStopped(stop);
                 if (tmen.getSourceText() == null) {
-                    // Not all TMX entries have a source; in that case there can
-                    // be no meaningful match, so skip.
+                    // Not all TMX entries have a source; skip it in
+                    // the case, because of no meaningful.
                     continue;
                 }
                 if (requiresTranslation && tmen.getTranslationText() == null) {
@@ -249,6 +279,7 @@ public List<NearString> search(String searchText, boolean requiresTranslation, b
                 processEntry(null, tmen, en.getKey(), NearString.MATCH_SOURCE.TM, false, tmenPenalty);
             }
         }
+
         // travel by all entries for check source file translations
         for (SourceTextEntry ste : project.getAllEntries()) {
             checkStopped(stop);
@@ -385,7 +416,7 @@ public void processEntry(EntryKey key, ITMXEntry entry, String tmxName,
         }
 
         // BUGS#1236 - stat display does not use threshold config check
-        if (applyThreshold && similarityStem < fuzzyMatchThreshold
+        if (fuzzyMatchThreshold > 0 && similarityStem < fuzzyMatchThreshold
                 && similarityNoStem < fuzzyMatchThreshold && simAdjusted < fuzzyMatchThreshold) {
             return;
         }

diff --git a/src/org/omegat/gui/matches/MatchesVarExpansion.java b/src/org/omegat/gui/matches/MatchesVarExpansion.java
@@ -90,19 +90,21 @@ public class MatchesVarExpansion extends VarExpansion<NearString> {
     public static final String VAR_DIFF_REVERSED = "${diffReversed}";
     public static final String VAR_SOURCE_LANGUAGE = "${sourceLanguage}";
     public static final String VAR_TARGET_LANGUAGE = "${targetLanguage}";
+    public static final String VAR_MATCH_SOURCE = "${matchSource}";
 
     private static final String[] MATCHES_VARIABLES = { VAR_ID, VAR_SOURCE_TEXT, VAR_DIFF, VAR_DIFF_REVERSED,
             VAR_TARGET_TEXT, VAR_SCORE_BASE, VAR_SCORE_NOSTEM, VAR_SCORE_ADJUSTED, VAR_FILE_NAME_ONLY,
             VAR_FILE_PATH, VAR_FILE_SHORT_PATH, VAR_INITIAL_CREATION_ID, VAR_INITIAL_CREATION_DATE,
-            VAR_CHANGED_ID, VAR_CHANGED_DATE, VAR_FUZZY_FLAG, VAR_SOURCE_LANGUAGE, VAR_TARGET_LANGUAGE };
+            VAR_CHANGED_ID, VAR_CHANGED_DATE, VAR_FUZZY_FLAG, VAR_SOURCE_LANGUAGE, VAR_TARGET_LANGUAGE,
+            VAR_MATCH_SOURCE };
 
     public static List<String> getMatchesVariables() {
         return Collections.unmodifiableList(Arrays.asList(MATCHES_VARIABLES));
     }
 
     public static final String DEFAULT_TEMPLATE = VAR_ID + ". " + VAR_FUZZY_FLAG + VAR_SOURCE_TEXT + "\n"
             + VAR_TARGET_TEXT + "\n" + "<" + VAR_SCORE_BASE + "/" + VAR_SCORE_NOSTEM + "/"
-            + VAR_SCORE_ADJUSTED + "% " + VAR_FILE_PATH + ">";
+            + VAR_SCORE_ADJUSTED + "%" + VAR_MATCH_SOURCE + VAR_FILE_PATH + ">";
 
     public static final Pattern PATTERN_SINGLE_PROPERTY = Pattern.compile("@\\{(.+?)\\}");
     public static final Pattern PATTERN_PROPERTY_GROUP = Pattern
@@ -222,6 +224,22 @@ private String getPropValue(List<TMXProp> props, String type) {
         return null;
     }
 
+    private String expandMatchSource(String localTemplate, NearString.MATCH_SOURCE comesFrom) {
+        switch(comesFrom) {
+        case TM:
+            return localTemplate.replace(VAR_MATCH_SOURCE, OStrings.getString("MATCHES_COMES_FROM_TM") + " ");
+        case FILES:
+            return localTemplate.replace(VAR_MATCH_SOURCE, OStrings.getString("MATCHES_COMES_FROM_FILES"));
+        case MEMORY:
+            return localTemplate.replace(VAR_MATCH_SOURCE, OStrings.getString( "MATCHES_COMES_FROM_MEMORY"));
+        case TM_SUBSEG:
+            return localTemplate.replace(VAR_MATCH_SOURCE, OStrings.getString(
+                    "MATCHES_COMES_FROM_TM_SUBSEG") + " ");
+        default:
+            return localTemplate.replace(VAR_MATCH_SOURCE, "");
+        }
+    }
+
     @Override
     public String expandVariables(NearString match) {
         // do not modify template directly, so that we can reuse for another
@@ -285,6 +303,9 @@ public String expandVariables(NearString match) {
         } else {
                 localTemplate = localTemplate.replace(VAR_TARGET_TEXT, match.translation);
         }
+
+        localTemplate = expandMatchSource(localTemplate, match.comesFrom);
+
         return localTemplate;
     }
 

diff --git a/test/src/org/omegat/core/statistics/FindMatchesTest.java b/test/src/org/omegat/core/statistics/FindMatchesTest.java
@@ -3,7 +3,7 @@
           with fuzzy matching, translation memory, keyword search,
           glossaries, and translation leveraging into updated projects.
 
- Copyright (C) 2021 Hiroshi Miura
+ Copyright (C) 2021-2024 Hiroshi Miura
                Home page: https://www.omegat.org/
                Support center: https://omegat.org/support
 

diff --git a/test/src/org/omegat/gui/matches/FindMatchesThreadTest.java b/test/src/org/omegat/gui/matches/FindMatchesThreadTest.java
diff --git a/test/src/org/omegat/gui/matches/MatchesVarExpansionTest.java b/test/src/org/omegat/gui/matches/MatchesVarExpansionTest.java
@@ -45,6 +45,7 @@
 import org.omegat.core.data.ProjectProperties;
 import org.omegat.core.data.SourceTextEntry;
 import org.omegat.core.matching.NearString;
+import org.omegat.core.matching.NearString.MATCH_SOURCE;
 import org.omegat.gui.editor.IEditor;
 import org.omegat.gui.editor.IEditorFilter;
 import org.omegat.gui.editor.IEditorSettings;
@@ -208,7 +209,7 @@ public NearString getMockNearString() {
         entry.changeDate = 20020523;
         entry.otherProperties = testProps;
         NearString.Scores scores = new NearString.Scores(20, 40, 60);
-        return new NearString(null, entry, null, false, scores, null, "mock testing project");
+        return new NearString(null, entry, MATCH_SOURCE.TM, false, scores, null, "mock testing project");
     };
 
     private void setupProject(Language sourceLanguage, Language targetLanguage) {