Fix FTS searches for punctuation and emoji.

Fixes #13047
This commit is contained in:
Greyson Parrelli 2023-08-15 15:41:47 -04:00 committed by Cody Henthorne
parent f2237a385e
commit e33c5b055d
3 changed files with 24 additions and 87 deletions

View file

@ -255,6 +255,12 @@ class SearchTable(context: Context, databaseHelper: SignalDatabase) : DatabaseTa
Log.w(TAG, "[fullyResetTables] Done. Index will be rebuilt asynchronously)")
}
/**
* We want to turn the user's query into something that works well in a MATCH query.
* Most users expect some amount of fuzzy search, so what we do is break the string
* into tokens, escape each token (to allow the user to search for punctuation), and
* then append a * to the end of each token to turn it into a prefix query.
*/
private fun createFullTextSearchQuery(query: String): String {
return query
.split(" ")
@ -267,7 +273,12 @@ class SearchTable(context: Context, databaseHelper: SignalDatabase) : DatabaseTa
)
}
/**
* If you wrap a string in quotes, sqlite considers it a string literal when making a MATCH query.
* In order to distinguish normal quotes, you turn all " into "".
*/
private fun fullTextSearchEscape(s: String): String {
return "\"${s.replace("\"", "\"\"")}\""
val quotesEscaped = s.replace("\"", "\"\"")
return "\"$quotesEscaped\""
}
}

View file

@ -12,8 +12,6 @@ import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import androidx.annotation.WorkerThread;
import com.annimon.stream.Stream;
import org.signal.core.util.CursorUtil;
import org.signal.core.util.StringUtil;
import org.signal.core.util.concurrent.LatestPrioritizedSerialExecutor;
@ -39,11 +37,11 @@ import org.thoughtcrime.securesms.database.model.databaseprotos.BodyRangeList;
import org.thoughtcrime.securesms.dependencies.ApplicationDependencies;
import org.thoughtcrime.securesms.recipients.Recipient;
import org.thoughtcrime.securesms.recipients.RecipientId;
import org.thoughtcrime.securesms.util.FtsUtil;
import org.thoughtcrime.securesms.util.Util;
import org.thoughtcrime.securesms.util.concurrent.SerialExecutor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@ -113,11 +111,10 @@ public class SearchRepository {
@WorkerThread
public @NonNull MessageSearchResult queryMessagesSync(@NonNull String query) {
long start = System.currentTimeMillis();
String cleanQuery = FtsUtil.sanitize(query);
long start = System.currentTimeMillis();
List<MessageResult> messages = queryMessages(cleanQuery);
List<MessageResult> mentionMessages = queryMentions(sanitizeQueryAsTokens(query));
List<MessageResult> messages = queryMessages(query);
List<MessageResult> mentionMessages = queryMentions(convertMentionsQueryToTokens(query));
List<MessageResult> combined = mergeMessagesAndMentions(messages, mentionMessages);
Log.d(TAG, "[messages] Search took " + (System.currentTimeMillis() - start) + " ms");
@ -133,8 +130,8 @@ public class SearchRepository {
serialExecutor.execute(() -> {
long startTime = System.currentTimeMillis();
List<MessageResult> messages = queryMessages(FtsUtil.sanitize(query), threadId);
List<MessageResult> mentionMessages = queryMentions(sanitizeQueryAsTokens(query), threadId);
List<MessageResult> messages = queryMessages(query, threadId);
List<MessageResult> mentionMessages = queryMentions(convertMentionsQueryToTokens(query), threadId);
Log.d(TAG, "[ConversationQuery] " + (System.currentTimeMillis() - startTime) + " ms");
@ -375,10 +372,10 @@ public class SearchRepository {
return results;
}
private @NonNull List<MessageResult> queryMentions(@NonNull List<String> cleanQueries, long threadId) {
private @NonNull List<MessageResult> queryMentions(@NonNull List<String> queries, long threadId) {
Set<RecipientId> recipientIds = new HashSet<>();
for (String cleanQuery : cleanQueries) {
for (Recipient recipient : recipientTable.queryRecipientsForMentions(cleanQuery)) {
for (String query : queries) {
for (Recipient recipient : recipientTable.queryRecipientsForMentions(query)) {
recipientIds.add(recipient.getId());
}
}
@ -442,13 +439,13 @@ public class SearchRepository {
return list;
}
private @NonNull List<String> sanitizeQueryAsTokens(@NonNull String query) {
private @NonNull List<String> convertMentionsQueryToTokens(@NonNull String query) {
String[] parts = query.split("\\s+");
if (parts.length > 3) {
return Collections.emptyList();
} else {
return Arrays.asList(parts);
}
return Stream.of(parts).map(FtsUtil::sanitize).toList();
}
private static @NonNull List<MessageResult> mergeMessagesAndMentions(@NonNull List<MessageResult> messages, @NonNull List<MessageResult> mentionMessages) {

View file

@ -1,71 +0,0 @@
package org.thoughtcrime.securesms.util;
import android.database.DatabaseUtils;
import androidx.annotation.NonNull;
import com.annimon.stream.Stream;
import java.util.HashSet;
import java.util.Set;
public final class FtsUtil {
private static final Set<Character> BANNED_CHARACTERS = new HashSet<>();
static {
// Several ranges of invalid ASCII characters
for (int i = 33; i <= 47; i++) {
BANNED_CHARACTERS.add((char) i);
}
for (int i = 58; i <= 64; i++) {
BANNED_CHARACTERS.add((char) i);
}
for (int i = 91; i <= 96; i++) {
BANNED_CHARACTERS.add((char) i);
}
for (int i = 123; i <= 126; i++) {
BANNED_CHARACTERS.add((char) i);
}
}
private FtsUtil() {}
/**
* Unfortunately {@link DatabaseUtils#sqlEscapeString(String)} is not sufficient for our purposes.
* MATCH queries have a separate format of their own that disallow most "special" characters.
*
* Also, SQLite can't search for apostrophes, meaning we can't normally find words like "I'm".
* However, if we replace the apostrophe with a space, then the query will find the match.
*/
public static @NonNull String sanitize(@NonNull String query) {
StringBuilder out = new StringBuilder();
for (int i = 0; i < query.length(); i++) {
char c = query.charAt(i);
if (!BANNED_CHARACTERS.contains(c)) {
out.append(c);
} else if (c == '\'') {
out.append(' ');
}
}
return out.toString();
}
/**
* Sanitizes the string (via {@link #sanitize(String)}) and appends * at the right spots such that each token in the query will be treated as a prefix.
*/
public static @NonNull String createPrefixMatchString(@NonNull String query) {
query = FtsUtil.sanitize(query);
return Stream.of(query.split(" "))
.map(String::trim)
.filter(s -> s.length() > 0)
.map(FtsUtil::fixQuotes)
.collect(StringBuilder::new, (sb, s) -> sb.append(s).append("* "))
.toString();
}
private static String fixQuotes(String s) {
return "\"" + s.replace("\"", "\"\"") + "\"";
}
}