Improve emoji search results.

This commit is contained in:
Greyson Parrelli 2022-08-03 11:51:50 -04:00
parent 18eac51576
commit 907abf72d3
3 changed files with 109 additions and 85 deletions

View file

@ -1,84 +0,0 @@
package org.thoughtcrime.securesms.database;
import android.content.ContentValues;
import android.content.Context;
import android.database.Cursor;
import android.text.TextUtils;
import androidx.annotation.NonNull;
import org.signal.core.util.CursorUtil;
import org.signal.core.util.SqlUtil;
import org.thoughtcrime.securesms.database.model.EmojiSearchData;
import org.thoughtcrime.securesms.util.FtsUtil;
import java.util.LinkedList;
import java.util.List;
/**
* Contains all info necessary for full-text search of emoji tags.
*/
public class EmojiSearchDatabase extends Database {
public static final String TABLE_NAME = "emoji_search";
public static final String LABEL = "label";
public static final String EMOJI = "emoji";
public static final String CREATE_TABLE = "CREATE VIRTUAL TABLE " + TABLE_NAME + " USING fts5(" + LABEL + ", " + EMOJI + " UNINDEXED)";
public EmojiSearchDatabase(@NonNull Context context, @NonNull SignalDatabase databaseHelper) {
super(context, databaseHelper);
}
/**
* @param query A search query. Doesn't need any special formatted -- it'll be sanitized.
* @return A list of emoji that are related to the search term, ordered by relevance.
*/
public @NonNull List<String> query(@NonNull String query, int limit) {
SQLiteDatabase db = databaseHelper.getSignalReadableDatabase();
String matchString = FtsUtil.createPrefixMatchString(query);
List<String> results = new LinkedList<>();
if (TextUtils.isEmpty(matchString)) {
return results;
}
String[] projection = new String[] { EMOJI };
String selection = LABEL + " MATCH (?)";
String[] args = SqlUtil.buildArgs(matchString);
try (Cursor cursor = db.query(true, TABLE_NAME, projection, selection, args, null, null, "rank", String.valueOf(limit))) {
while (cursor.moveToNext()) {
results.add(CursorUtil.requireString(cursor, EMOJI));
}
}
return results;
}
/**
* Deletes the content of the current search index and replaces it with the new one.
*/
public void setSearchIndex(@NonNull List<EmojiSearchData> searchIndex) {
SQLiteDatabase db = databaseHelper.getSignalReadableDatabase();
db.beginTransaction();
try {
db.delete(TABLE_NAME, null, null);
for (EmojiSearchData searchData : searchIndex) {
for (String label : searchData.getTags()) {
ContentValues values = new ContentValues(2);
values.put(LABEL, label);
values.put(EMOJI, searchData.getEmoji());
db.insert(TABLE_NAME, null, values);
}
}
db.setTransactionSuccessful();
} finally {
db.endTransaction();
}
}
}

View file

@ -0,0 +1,108 @@
package org.thoughtcrime.securesms.database
import android.content.Context
import android.text.TextUtils
import androidx.core.content.contentValuesOf
import org.signal.core.util.requireNonNullString
import org.signal.core.util.select
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.database.model.EmojiSearchData
import kotlin.math.max
import kotlin.math.roundToInt
/**
* Contains all info necessary for full-text search of emoji tags.
*/
class EmojiSearchDatabase(context: Context, databaseHelper: SignalDatabase) : Database(context, databaseHelper) {
companion object {
const val TABLE_NAME = "emoji_search"
const val LABEL = "label"
const val EMOJI = "emoji"
const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)"
}
/**
* @param query A search query. Doesn't need any special formatted -- it'll be sanitized.
* @return A list of emoji that are related to the search term, ordered by relevance.
*/
fun query(originalQuery: String, originalLimit: Int): List<String> {
val query: String = originalQuery.trim()
if (TextUtils.isEmpty(query)) {
return emptyList()
}
val limit: Int = max(originalLimit, 100)
val entries = mutableListOf<Entry>()
readableDatabase
.select(LABEL, EMOJI)
.from(TABLE_NAME)
.where("$LABEL LIKE ?", "%$query%")
.limit(limit)
.run()
.use { cursor ->
while (cursor.moveToNext()) {
entries += Entry(
label = cursor.requireNonNullString(LABEL),
emoji = cursor.requireNonNullString(EMOJI)
)
}
}
return entries
.sortedWith { lhs, rhs ->
similarityScore(query, lhs.label) - similarityScore(query, rhs.label)
}
.take(originalLimit)
.map { it.emoji }
}
/**
* Deletes the content of the current search index and replaces it with the new one.
*/
fun setSearchIndex(searchIndex: List<EmojiSearchData>) {
val db = databaseHelper.signalReadableDatabase
db.withinTransaction {
db.delete(TABLE_NAME, null, null)
for (searchData in searchIndex) {
for (label in searchData.tags) {
val values = contentValuesOf(
LABEL to label,
EMOJI to searchData.emoji
)
db.insert(TABLE_NAME, null, values)
}
}
}
}
/**
* Ranks how "similar" a match is to the original search term.
* A lower score means more similar, with 0 being a perfect match.
*
* We know that the `searchTerm` must be a substring of the `match`.
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
*/
private fun similarityScore(searchTerm: String, match: String): Int {
if (searchTerm == match) {
return 0
}
val startIndex = match.indexOf(searchTerm)
val prefixCount = startIndex
val suffixCount = match.length - (startIndex + searchTerm.length)
val prefixRankWeight = 1.5f
val suffixRankWeight = 1f
return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt()
}
private data class Entry(val label: String, val emoji: String)
}

View file

@ -26,7 +26,7 @@ class EmojiSearchRepository(private val context: Context) {
fun submitQuery(query: String, limit: Int = EMOJI_SEARCH_LIMIT): Single<List<String>> {
val result = if (query.length >= MINIMUM_INLINE_QUERY_THRESHOLD && NOT_PUNCTUATION.matches(query.substring(query.lastIndex))) {
Single.fromCallable<List<String>> { emojiSearchDatabase.query(query, limit) }
Single.fromCallable { emojiSearchDatabase.query(query, limit) }
} else {
Single.just(emptyList())
}