Account for grapheme cluster when trimming to fit a specific length.
Fixes #10076
This commit is contained in:
parent
da4be5c1cf
commit
f06817f00d
4 changed files with 387 additions and 10 deletions
|
@ -0,0 +1,124 @@
|
|||
package org.thoughtcrime.securesms.util;
|
||||
|
||||
import android.os.Build;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Iterates over a string treating a surrogate pair and a grapheme cluster a single character.
|
||||
*/
|
||||
public final class CharacterIterable implements Iterable<String> {
|
||||
|
||||
private final String string;
|
||||
|
||||
public CharacterIterable(@NonNull String string) {
|
||||
this.string = string;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NonNull Iterator<String> iterator() {
|
||||
return new CharacterIterator();
|
||||
}
|
||||
|
||||
private class CharacterIterator implements Iterator<String> {
|
||||
private static final int UNINITIALIZED = -2;
|
||||
|
||||
private final BreakIteratorCompat breakIterator;
|
||||
|
||||
private int lastIndex = UNINITIALIZED;
|
||||
|
||||
CharacterIterator() {
|
||||
this.breakIterator = Build.VERSION.SDK_INT >= 24 ? new AndroidIcuBreakIterator(string)
|
||||
: new FallbackBreakIterator(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (lastIndex == UNINITIALIZED) {
|
||||
lastIndex = breakIterator.first();
|
||||
}
|
||||
return !breakIterator.isDone(lastIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
int firstIndex = lastIndex;
|
||||
lastIndex = breakIterator.next();
|
||||
return string.substring(firstIndex, lastIndex);
|
||||
}
|
||||
}
|
||||
|
||||
private interface BreakIteratorCompat {
|
||||
int first();
|
||||
|
||||
int next();
|
||||
|
||||
boolean isDone(int index);
|
||||
}
|
||||
|
||||
/**
|
||||
* An BreakIteratorCompat implementation that delegates calls to `android.icu.text.BreakIterator`.
|
||||
* This class handles grapheme clusters fine but requires Android API >= 24.
|
||||
*/
|
||||
@RequiresApi(24)
|
||||
private static class AndroidIcuBreakIterator implements BreakIteratorCompat {
|
||||
private final android.icu.text.BreakIterator breakIterator = android.icu.text.BreakIterator.getCharacterInstance();
|
||||
|
||||
public AndroidIcuBreakIterator(@NonNull String string) {
|
||||
breakIterator.setText(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return breakIterator.first();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
return breakIterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDone(int index) {
|
||||
return index == android.icu.text.BreakIterator.DONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An BreakIteratorCompat implementation that delegates calls to `java.text.BreakIterator`.
|
||||
* This class may or may not handle grapheme clusters well depending on the underlying implementation.
|
||||
* In the emulator, API 23 implements ICU version of the BreakIterator so that it handles grapheme
|
||||
* clusters fine. But API 21 implements RuleBasedIterator which does not handle grapheme clusters.
|
||||
* <p>
|
||||
* If it doesn't handle grapheme clusters correctly, in most cases the combined characters are
|
||||
* broken up into pieces when the code tries to trim a string. For example, an emoji that is
|
||||
* a combination of a person, gender and skin tone, trimming the character using this class may result
|
||||
* in trimming the parts of the character, e.g. a dark skin frowning woman emoji may result in
|
||||
* a neutral skin frowning woman emoji.
|
||||
*/
|
||||
private static class FallbackBreakIterator implements BreakIteratorCompat {
|
||||
private final java.text.BreakIterator breakIterator = java.text.BreakIterator.getCharacterInstance();
|
||||
|
||||
public FallbackBreakIterator(@NonNull String string) {
|
||||
breakIterator.setText(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int first() {
|
||||
return breakIterator.first();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int next() {
|
||||
return breakIterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDone(int index) {
|
||||
return index == java.text.BreakIterator.DONE;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,9 +1,13 @@
|
|||
package org.thoughtcrime.securesms.util;
|
||||
|
||||
import android.text.TextUtils;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.core.text.BidiFormatter;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -39,21 +43,33 @@ public final class StringUtil {
|
|||
|
||||
/**
|
||||
* Trims a name string to fit into the byte length requirement.
|
||||
* <p>
|
||||
* This method treats a surrogate pair and a grapheme cluster a single character
|
||||
* See examples in tests defined in StringUtilText_trimToFit.
|
||||
*/
|
||||
public static @NonNull String trimToFit(@Nullable String name, int maxLength) {
|
||||
if (name == null) return "";
|
||||
|
||||
// At least one byte per char, so shorten string to reduce loop
|
||||
if (name.length() > maxLength) {
|
||||
name = name.substring(0, maxLength);
|
||||
public static @NonNull String trimToFit(@Nullable String name, int maxByteLength) {
|
||||
if (TextUtils.isEmpty(name)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Remove one char at a time until fits in byte allowance
|
||||
while (name.getBytes(StandardCharsets.UTF_8).length > maxLength) {
|
||||
name = name.substring(0, name.length() - 1);
|
||||
if (name.getBytes(StandardCharsets.UTF_8).length <= maxByteLength) {
|
||||
return name;
|
||||
}
|
||||
|
||||
return name;
|
||||
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
|
||||
for (String graphemeCharacter : new CharacterIterable(name)) {
|
||||
byte[] bytes = graphemeCharacter.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
if (stream.size() + bytes.length <= maxByteLength) {
|
||||
stream.write(bytes);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return stream.toString();
|
||||
} catch (IOException e) {
|
||||
throw new AssertionError(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
package org.thoughtcrime.securesms.profiles;
|
||||
|
||||
import android.app.Application;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.robolectric.RobolectricTestRunner;
|
||||
import org.robolectric.annotation.Config;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
@ -8,6 +13,8 @@ import static org.junit.Assert.assertNotNull;
|
|||
import static org.junit.Assert.assertSame;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
@RunWith(RobolectricTestRunner.class)
|
||||
@Config(manifest = Config.NONE, application = Application.class)
|
||||
public final class ProfileNameTest {
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,230 @@
|
|||
package org.thoughtcrime.securesms.util;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.Assume.assumeTrue;
|
||||
|
||||
import android.app.Application;
|
||||
|
||||
import android.os.Build;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.robolectric.RobolectricTestRunner;
|
||||
import org.robolectric.annotation.Config;
|
||||
|
||||
@RunWith(RobolectricTestRunner.class)
|
||||
@Config(manifest = Config.NONE, application = Application.class)
|
||||
public final class StringUtilTest_trimToFit {
|
||||
|
||||
@Test
|
||||
public void testShortStringIsNotTrimmed() {
|
||||
assertEquals("Test string", StringUtil.trimToFit("Test string", 32));
|
||||
assertEquals("", StringUtil.trimToFit("", 32));
|
||||
assertEquals("aaaBBBCCC", StringUtil.trimToFit("aaaBBBCCC", 9));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNull() {
|
||||
assertEquals("", StringUtil.trimToFit(null, 0));
|
||||
assertEquals("", StringUtil.trimToFit(null, 1));
|
||||
assertEquals("", StringUtil.trimToFit(null, 10));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringIsTrimmed() {
|
||||
assertEquals("Test stri", StringUtil.trimToFit("Test string", 9));
|
||||
assertEquals("aaaBBBCC", StringUtil.trimToFit("aaaBBBCCC", 8));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringWithControlCharsIsTrimmed() {
|
||||
assertEquals("Test string\nwrap\r\nhere",
|
||||
StringUtil.trimToFit("Test string\nwrap\r\nhere\tindent\n\n", 22));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAccentedCharactersAreTrimmedCorrectly() {
|
||||
assertEquals("", StringUtil.trimToFit("âëȋõṷ", 1));
|
||||
assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 2));
|
||||
assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 3));
|
||||
assertEquals("âë", StringUtil.trimToFit("âëȋõṷ", 4));
|
||||
assertEquals("The last characters take more than a byte in utf8 â",
|
||||
StringUtil.trimToFit("The last characters take more than a byte in utf8 âëȋõṷ", 53));
|
||||
assertEquals("un quinzième jour en jaune apr", StringUtil.trimToFit("un quinzième jour en jaune après son épopée de 2019", 32));
|
||||
assertEquals("una vez se organizaron detrás l", StringUtil.trimToFit("una vez se organizaron detrás la ventaja nunca pasó de los 3 minutos.", 32));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombinedAccentsAreTrimmedAsACharacter() {
|
||||
final String a = "a\u0302";
|
||||
final String e = "e\u0308";
|
||||
final String i = "i\u0311";
|
||||
final String o = "o\u0303";
|
||||
final String u = "u\u032d";
|
||||
assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 1));
|
||||
assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 2));
|
||||
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 3));
|
||||
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 4));
|
||||
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 5));
|
||||
assertEquals(a + e, StringUtil.trimToFit(a + e + i + o + u, 6));
|
||||
assertEquals("The last characters take more than a byte in utf8 " + a,
|
||||
StringUtil.trimToFit("The last characters take more than a byte in utf8 " + a + e + i + o + u, 53));
|
||||
assertEquals("un quinzie\u0300me jour en jaune apr", StringUtil.trimToFit("un quinzie\u0300me jour en jaune apre\u0300s son e\u0301pope\u0301e de 2019", 32));
|
||||
assertEquals("una vez se organizaron detra\u0301s ", StringUtil.trimToFit("una vez se organizaron detra\u0301s la ventaja nunca paso\u0301 de los 3 minutos.", 32));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCJKCharactersAreTrimmedCorrectly() {
|
||||
final String shin = "\u4fe1";
|
||||
final String signal = shin + "\u53f7";
|
||||
final String _private = "\u79c1\u4eba";
|
||||
final String messenger = "\u4fe1\u4f7f";
|
||||
assertEquals("", StringUtil.trimToFit(signal, 1));
|
||||
assertEquals("", StringUtil.trimToFit(signal, 2));
|
||||
assertEquals(shin, StringUtil.trimToFit(signal, 3));
|
||||
assertEquals(shin, StringUtil.trimToFit(signal, 4));
|
||||
assertEquals(shin, StringUtil.trimToFit(signal, 5));
|
||||
assertEquals(signal, StringUtil.trimToFit(signal, 6));
|
||||
assertEquals(String.format("Signal %s Pr", signal),
|
||||
StringUtil.trimToFit(String.format("Signal %s Private %s Messenger %s", signal, _private, messenger),
|
||||
16));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSurrogatePairsAreTrimmedCorrectly() {
|
||||
final String sword = "\uD841\uDF4F";
|
||||
assertEquals("", StringUtil.trimToFit(sword, 1));
|
||||
assertEquals("", StringUtil.trimToFit(sword, 2));
|
||||
assertEquals("", StringUtil.trimToFit(sword, 3));
|
||||
assertEquals(sword, StringUtil.trimToFit(sword, 4));
|
||||
|
||||
final String so = "\ud869\uddf1";
|
||||
final String go = "\ud869\ude1a";
|
||||
assertEquals("", StringUtil.trimToFit(so + go, 1));
|
||||
assertEquals("", StringUtil.trimToFit(so + go, 2));
|
||||
assertEquals("", StringUtil.trimToFit(so + go, 3));
|
||||
assertEquals(so, StringUtil.trimToFit(so + go, 4));
|
||||
assertEquals(so, StringUtil.trimToFit(so + go, 5));
|
||||
assertEquals(so, StringUtil.trimToFit(so + go, 6));
|
||||
assertEquals(so, StringUtil.trimToFit(so + go, 7));
|
||||
assertEquals(so + go, StringUtil.trimToFit(so + go, 8));
|
||||
|
||||
final String gClef = "\uD834\uDD1E";
|
||||
final String fClef = "\uD834\uDD22";
|
||||
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 1));
|
||||
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 2));
|
||||
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 3));
|
||||
assertEquals(gClef, StringUtil.trimToFit(gClef + " " + fClef, 4));
|
||||
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 5));
|
||||
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 6));
|
||||
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 7));
|
||||
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 8));
|
||||
assertEquals(gClef + " " + fClef, StringUtil.trimToFit(gClef + " " + fClef, 9));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleEmojiTrimming() {
|
||||
final String congrats = "\u3297";
|
||||
assertEquals("", StringUtil.trimToFit(congrats, 1));
|
||||
assertEquals("", StringUtil.trimToFit(congrats, 2));
|
||||
assertEquals(congrats, StringUtil.trimToFit(congrats, 3));
|
||||
|
||||
final String eject = "\u23cf";
|
||||
assertEquals("", StringUtil.trimToFit(eject, 1));
|
||||
assertEquals("", StringUtil.trimToFit(eject, 2));
|
||||
assertEquals(eject, StringUtil.trimToFit(eject, 3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmojisSurrogatePairTrimming() {
|
||||
final String grape = "🍇";
|
||||
assertEquals("", StringUtil.trimToFit(grape, 1));
|
||||
assertEquals("", StringUtil.trimToFit(grape, 2));
|
||||
assertEquals("", StringUtil.trimToFit(grape, 3));
|
||||
assertEquals(grape, StringUtil.trimToFit(grape, 4));
|
||||
|
||||
final String smile = "\uD83D\uDE42";
|
||||
assertEquals("", StringUtil.trimToFit(smile, 1));
|
||||
assertEquals("", StringUtil.trimToFit(smile, 2));
|
||||
assertEquals("", StringUtil.trimToFit(smile, 3));
|
||||
assertEquals(smile, StringUtil.trimToFit(smile, 4));
|
||||
|
||||
final String check = "\u2714"; // Simple emoji
|
||||
assertEquals(check, StringUtil.trimToFit(check, 3));
|
||||
final String secret = "\u3299"; // Simple emoji
|
||||
assertEquals(secret, StringUtil.trimToFit(secret, 3));
|
||||
final String phoneWithArrow = "\uD83D\uDCF2"; // Surrogate Pair emoji
|
||||
assertEquals(phoneWithArrow, StringUtil.trimToFit(phoneWithArrow, 4));
|
||||
|
||||
assertEquals(phoneWithArrow + ":",
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 7));
|
||||
assertEquals(phoneWithArrow + ":" + secret,
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 8));
|
||||
assertEquals(phoneWithArrow + ":" + secret + ",",
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 9));
|
||||
assertEquals(phoneWithArrow + ":" + secret + ", ",
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 10));
|
||||
assertEquals(phoneWithArrow + ":" + secret + ", ",
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 11));
|
||||
assertEquals(phoneWithArrow + ":" + secret + ", ",
|
||||
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 12));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGraphemeClusterTrimming1() {
|
||||
assumeTrue(Build.VERSION.SDK_INT >= 24);
|
||||
|
||||
final String alphas = "AAAAABBBBBCCCCCDDDDDEEEEE";
|
||||
final String wavingHand = "\uD83D\uDC4B";
|
||||
final String mediumDark = "\uD83C\uDFFE";
|
||||
assertEquals(alphas, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 32));
|
||||
assertEquals(alphas + wavingHand + mediumDark, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 33));
|
||||
|
||||
final String pads = "abcdefghijklm";
|
||||
final String frowningPerson = "\uD83D\uDE4D";
|
||||
final String female = "\u200D\u2640\uFE0F";
|
||||
assertEquals(pads + frowningPerson + female,
|
||||
StringUtil.trimToFit(pads + frowningPerson + female, 26));
|
||||
assertEquals(pads + "n",
|
||||
StringUtil.trimToFit(pads + "n" + frowningPerson + female, 26));
|
||||
|
||||
final String pads1 = "abcdef";
|
||||
final String mediumSkin = "\uD83C\uDFFD️";
|
||||
assertEquals(pads1 + frowningPerson + mediumSkin + female,
|
||||
StringUtil.trimToFit(pads1 + frowningPerson + mediumSkin + female, 26));
|
||||
assertEquals(pads1 + "g",
|
||||
StringUtil.trimToFit(pads1 + "g" + frowningPerson + mediumSkin + female, 26));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGraphemeClusterTrimming2() {
|
||||
assumeTrue(Build.VERSION.SDK_INT >= 24);
|
||||
|
||||
final String woman = "\uD83D\uDC69";
|
||||
final String mediumDarkSkin = "\uD83C\uDFFE";
|
||||
final String joint = "\u200D";
|
||||
final String hands = "\uD83E\uDD1D";
|
||||
final String man = "\uD83D\uDC68";
|
||||
final String lightSkin = "\uD83C\uDFFB";
|
||||
|
||||
assertEquals(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin,
|
||||
StringUtil.trimToFit(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26));
|
||||
assertEquals("a",
|
||||
StringUtil.trimToFit("a" + woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26));
|
||||
|
||||
final String pads = "abcdefghijk";
|
||||
final String wheelchair = "\uD83E\uDDBC";
|
||||
assertEquals(pads + man + lightSkin + joint + wheelchair,
|
||||
StringUtil.trimToFit(pads + man + lightSkin + joint + wheelchair, 26));
|
||||
assertEquals(pads + "l",
|
||||
StringUtil.trimToFit(pads + "l" + man + lightSkin + joint + wheelchair, 26));
|
||||
|
||||
final String girl = "\uD83D\uDC67";
|
||||
final String boy = "\uD83D\uDC66";
|
||||
assertEquals(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy,
|
||||
StringUtil.trimToFit(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33));
|
||||
assertEquals("a",
|
||||
StringUtil.trimToFit("a" + man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33));
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue