Improve emoji search rankings.

This commit is contained in:
Greyson Parrelli 2022-12-21 09:26:30 -05:00
parent 91fbc236ce
commit eada1e96ee
6 changed files with 123 additions and 15 deletions

View file

@ -3,12 +3,13 @@ package org.thoughtcrime.securesms.database
import android.content.Context
import android.text.TextUtils
import androidx.core.content.contentValuesOf
import org.signal.core.util.readToSingleInt
import org.signal.core.util.requireInt
import org.signal.core.util.requireNonNullString
import org.signal.core.util.select
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.database.model.EmojiSearchData
import kotlin.math.max
import kotlin.math.roundToInt
/**
* Contains all info necessary for full-text search of emoji tags.
@ -17,9 +18,24 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
companion object {
const val TABLE_NAME = "emoji_search"
const val ID = "_id"
const val LABEL = "label"
const val EMOJI = "emoji"
const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)"
const val RANK = "rank"
//language=sql
const val CREATE_TABLE = """
CREATE TABLE $TABLE_NAME (
$ID INTEGER PRIMARY KEY,
$LABEL TEXT NOT NULL,
$EMOJI TEXT NOT NULL,
$RANK INTEGER DEFAULT ${Int.MAX_VALUE}
)
"""
val CREATE_INDEXES = arrayOf(
"CREATE INDEX emoji_search_rank_covering ON $TABLE_NAME ($RANK, $LABEL, $EMOJI)"
)
}
/**
@ -33,27 +49,41 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
return emptyList()
}
val limit: Int = max(originalLimit, 100)
val limit: Int = max(originalLimit, 200)
val entries = mutableListOf<Entry>()
val maxRank = readableDatabase
.select("MAX($RANK) AS max")
.from(TABLE_NAME)
.where("$RANK != ${Int.MAX_VALUE}")
.run()
.readToSingleInt()
readableDatabase
.select(LABEL, EMOJI)
.select(LABEL, EMOJI, RANK)
.from(TABLE_NAME)
.where("$LABEL LIKE ?", "%$query%")
.orderBy("$RANK ASC")
.limit(limit)
.run()
.use { cursor ->
while (cursor.moveToNext()) {
entries += Entry(
label = cursor.requireNonNullString(LABEL),
emoji = cursor.requireNonNullString(EMOJI)
emoji = cursor.requireNonNullString(EMOJI),
rank = cursor.requireInt(RANK)
)
}
}
return entries
.sortedWith { lhs, rhs ->
similarityScore(query, lhs.label) - similarityScore(query, rhs.label)
val result = similarityScore(query, lhs, maxRank) - similarityScore(query, rhs, maxRank)
when {
result < 0 -> -1
result > 0 -> 1
else -> 0
}
}
.distinctBy { it.emoji }
.take(originalLimit)
@ -73,7 +103,8 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
for (label in searchData.tags) {
val values = contentValuesOf(
LABEL to label,
EMOJI to searchData.emoji
EMOJI to searchData.emoji,
RANK to if (searchData.rank == 0) Int.MAX_VALUE else searchData.rank
)
db.insert(TABLE_NAME, null, values)
}
@ -89,9 +120,11 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
*/
private fun similarityScore(searchTerm: String, match: String): Int {
private fun similarityScore(searchTerm: String, entry: Entry, maxRank: Int): Float {
val match: String = entry.label
if (searchTerm == match) {
return 0
return entry.scaledRank(maxRank)
}
val startIndex = match.indexOf(searchTerm)
@ -99,11 +132,25 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
val prefixCount = startIndex
val suffixCount = match.length - (startIndex + searchTerm.length)
val prefixRankWeight = 1.5f
val suffixRankWeight = 1f
val prefixRankWeight = 1.75f
val suffixRankWeight = 0.75f
val notExactMatchPenalty = 2f
return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt()
return notExactMatchPenalty +
(prefixCount * prefixRankWeight) +
(suffixCount * suffixRankWeight) +
entry.scaledRank(maxRank)
}
private data class Entry(val label: String, val emoji: String)
private data class Entry(val label: String, val emoji: String, val rank: Int) {
fun scaledRank(maxRank: Int): Float {
val unranked = 2f
val scaleFactor: Float = unranked / maxRank
return if (rank == Int.MAX_VALUE) {
unranked
} else {
rank * scaleFactor
}
}
}
}

View file

@ -24,6 +24,7 @@ import org.thoughtcrime.securesms.database.helpers.migration.V165_MmsMessageBoxP
import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys
import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers
import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration
import org.thoughtcrime.securesms.database.helpers.migration.V169_EmojiSearchIndexRank
/**
* Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness.
@ -32,7 +33,7 @@ object SignalDatabaseMigrations {
val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass)
const val DATABASE_VERSION = 168
const val DATABASE_VERSION = 169
@JvmStatic
fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
@ -115,6 +116,10 @@ object SignalDatabaseMigrations {
if (oldVersion < 168) {
V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion)
}
if (oldVersion < 169) {
V169_EmojiSearchIndexRank.migrate(context, db, oldVersion, newVersion)
}
}
@JvmStatic

View file

@ -0,0 +1,27 @@
package org.thoughtcrime.securesms.database.helpers.migration
import android.app.Application
import net.zetetic.database.sqlcipher.SQLiteDatabase
/**
* We want to add a new `rank` column to the emoji_search table, and we no longer use it as an FTS
* table, so we can get rid of that too.
*/
object V169_EmojiSearchIndexRank : SignalDatabaseMigration {
override fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
db.execSQL(
"""
CREATE TABLE emoji_search_tmp (
_id INTEGER PRIMARY KEY,
label TEXT NOT NULL,
emoji TEXT NOT NULL,
rank INTEGER DEFAULT ${Int.MAX_VALUE}
)
"""
)
db.execSQL("INSERT INTO emoji_search_tmp (label, emoji) SELECT label, emoji from emoji_search")
db.execSQL("DROP TABLE emoji_search")
db.execSQL("ALTER TABLE emoji_search_tmp RENAME TO emoji_search")
db.execSQL("CREATE INDEX emoji_search_rank_covering ON emoji_search (rank, label, emoji)")
}
}

View file

@ -1,6 +1,7 @@
package org.thoughtcrime.securesms.database.model;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.fasterxml.jackson.annotation.JsonProperty;
@ -16,6 +17,12 @@ public final class EmojiSearchData {
@JsonProperty
private List<String> tags;
@JsonProperty
private String shortName;
@JsonProperty
private int rank;
public EmojiSearchData() {}
public @NonNull String getEmoji() {
@ -25,4 +32,15 @@ public final class EmojiSearchData {
public @NonNull List<String> getTags() {
return tags;
}
public @Nullable String getShortName() {
return shortName;
}
/**
* A value representing how popular an emoji is, with 1 being the best rank. A value of 0 means this emoji has no rank at all.
*/
public int getRank() {
return rank;
}
}

View file

@ -16,7 +16,7 @@ import java.util.function.Consumer
private const val MINIMUM_QUERY_THRESHOLD = 1
private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2
private const val EMOJI_SEARCH_LIMIT = 20
private const val EMOJI_SEARCH_LIMIT = 50
private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex()

View file

@ -78,6 +78,17 @@ fun Cursor.readToSingleLong(defaultValue: Long = 0): Long {
}
}
@JvmOverloads
fun Cursor.readToSingleInt(defaultValue: Int = 0): Int {
return use {
if (it.moveToFirst()) {
it.getInt(0)
} else {
defaultValue
}
}
}
@JvmOverloads
inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> {
val list = mutableListOf<T>()