Improve emoji search rankings.
This commit is contained in:
parent
91fbc236ce
commit
eada1e96ee
6 changed files with 123 additions and 15 deletions
|
@ -3,12 +3,13 @@ package org.thoughtcrime.securesms.database
|
|||
import android.content.Context
|
||||
import android.text.TextUtils
|
||||
import androidx.core.content.contentValuesOf
|
||||
import org.signal.core.util.readToSingleInt
|
||||
import org.signal.core.util.requireInt
|
||||
import org.signal.core.util.requireNonNullString
|
||||
import org.signal.core.util.select
|
||||
import org.signal.core.util.withinTransaction
|
||||
import org.thoughtcrime.securesms.database.model.EmojiSearchData
|
||||
import kotlin.math.max
|
||||
import kotlin.math.roundToInt
|
||||
|
||||
/**
|
||||
* Contains all info necessary for full-text search of emoji tags.
|
||||
|
@ -17,9 +18,24 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
|||
|
||||
companion object {
|
||||
const val TABLE_NAME = "emoji_search"
|
||||
const val ID = "_id"
|
||||
const val LABEL = "label"
|
||||
const val EMOJI = "emoji"
|
||||
const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)"
|
||||
const val RANK = "rank"
|
||||
|
||||
//language=sql
|
||||
const val CREATE_TABLE = """
|
||||
CREATE TABLE $TABLE_NAME (
|
||||
$ID INTEGER PRIMARY KEY,
|
||||
$LABEL TEXT NOT NULL,
|
||||
$EMOJI TEXT NOT NULL,
|
||||
$RANK INTEGER DEFAULT ${Int.MAX_VALUE}
|
||||
)
|
||||
"""
|
||||
|
||||
val CREATE_INDEXES = arrayOf(
|
||||
"CREATE INDEX emoji_search_rank_covering ON $TABLE_NAME ($RANK, $LABEL, $EMOJI)"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -33,27 +49,41 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
|||
return emptyList()
|
||||
}
|
||||
|
||||
val limit: Int = max(originalLimit, 100)
|
||||
val limit: Int = max(originalLimit, 200)
|
||||
val entries = mutableListOf<Entry>()
|
||||
|
||||
val maxRank = readableDatabase
|
||||
.select("MAX($RANK) AS max")
|
||||
.from(TABLE_NAME)
|
||||
.where("$RANK != ${Int.MAX_VALUE}")
|
||||
.run()
|
||||
.readToSingleInt()
|
||||
|
||||
readableDatabase
|
||||
.select(LABEL, EMOJI)
|
||||
.select(LABEL, EMOJI, RANK)
|
||||
.from(TABLE_NAME)
|
||||
.where("$LABEL LIKE ?", "%$query%")
|
||||
.orderBy("$RANK ASC")
|
||||
.limit(limit)
|
||||
.run()
|
||||
.use { cursor ->
|
||||
while (cursor.moveToNext()) {
|
||||
entries += Entry(
|
||||
label = cursor.requireNonNullString(LABEL),
|
||||
emoji = cursor.requireNonNullString(EMOJI)
|
||||
emoji = cursor.requireNonNullString(EMOJI),
|
||||
rank = cursor.requireInt(RANK)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return entries
|
||||
.sortedWith { lhs, rhs ->
|
||||
similarityScore(query, lhs.label) - similarityScore(query, rhs.label)
|
||||
val result = similarityScore(query, lhs, maxRank) - similarityScore(query, rhs, maxRank)
|
||||
when {
|
||||
result < 0 -> -1
|
||||
result > 0 -> 1
|
||||
else -> 0
|
||||
}
|
||||
}
|
||||
.distinctBy { it.emoji }
|
||||
.take(originalLimit)
|
||||
|
@ -73,7 +103,8 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
|||
for (label in searchData.tags) {
|
||||
val values = contentValuesOf(
|
||||
LABEL to label,
|
||||
EMOJI to searchData.emoji
|
||||
EMOJI to searchData.emoji,
|
||||
RANK to if (searchData.rank == 0) Int.MAX_VALUE else searchData.rank
|
||||
)
|
||||
db.insert(TABLE_NAME, null, values)
|
||||
}
|
||||
|
@ -89,9 +120,11 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
|||
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
|
||||
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
|
||||
*/
|
||||
private fun similarityScore(searchTerm: String, match: String): Int {
|
||||
private fun similarityScore(searchTerm: String, entry: Entry, maxRank: Int): Float {
|
||||
val match: String = entry.label
|
||||
|
||||
if (searchTerm == match) {
|
||||
return 0
|
||||
return entry.scaledRank(maxRank)
|
||||
}
|
||||
|
||||
val startIndex = match.indexOf(searchTerm)
|
||||
|
@ -99,11 +132,25 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
|||
val prefixCount = startIndex
|
||||
val suffixCount = match.length - (startIndex + searchTerm.length)
|
||||
|
||||
val prefixRankWeight = 1.5f
|
||||
val suffixRankWeight = 1f
|
||||
val prefixRankWeight = 1.75f
|
||||
val suffixRankWeight = 0.75f
|
||||
val notExactMatchPenalty = 2f
|
||||
|
||||
return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt()
|
||||
return notExactMatchPenalty +
|
||||
(prefixCount * prefixRankWeight) +
|
||||
(suffixCount * suffixRankWeight) +
|
||||
entry.scaledRank(maxRank)
|
||||
}
|
||||
|
||||
private data class Entry(val label: String, val emoji: String)
|
||||
private data class Entry(val label: String, val emoji: String, val rank: Int) {
|
||||
fun scaledRank(maxRank: Int): Float {
|
||||
val unranked = 2f
|
||||
val scaleFactor: Float = unranked / maxRank
|
||||
return if (rank == Int.MAX_VALUE) {
|
||||
unranked
|
||||
} else {
|
||||
rank * scaleFactor
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.thoughtcrime.securesms.database.helpers.migration.V165_MmsMessageBoxP
|
|||
import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys
|
||||
import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers
|
||||
import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration
|
||||
import org.thoughtcrime.securesms.database.helpers.migration.V169_EmojiSearchIndexRank
|
||||
|
||||
/**
|
||||
* Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness.
|
||||
|
@ -32,7 +33,7 @@ object SignalDatabaseMigrations {
|
|||
|
||||
val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass)
|
||||
|
||||
const val DATABASE_VERSION = 168
|
||||
const val DATABASE_VERSION = 169
|
||||
|
||||
@JvmStatic
|
||||
fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
|
||||
|
@ -115,6 +116,10 @@ object SignalDatabaseMigrations {
|
|||
if (oldVersion < 168) {
|
||||
V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion)
|
||||
}
|
||||
|
||||
if (oldVersion < 169) {
|
||||
V169_EmojiSearchIndexRank.migrate(context, db, oldVersion, newVersion)
|
||||
}
|
||||
}
|
||||
|
||||
@JvmStatic
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
package org.thoughtcrime.securesms.database.helpers.migration
|
||||
|
||||
import android.app.Application
|
||||
import net.zetetic.database.sqlcipher.SQLiteDatabase
|
||||
|
||||
/**
|
||||
* We want to add a new `rank` column to the emoji_search table, and we no longer use it as an FTS
|
||||
* table, so we can get rid of that too.
|
||||
*/
|
||||
object V169_EmojiSearchIndexRank : SignalDatabaseMigration {
|
||||
override fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
|
||||
db.execSQL(
|
||||
"""
|
||||
CREATE TABLE emoji_search_tmp (
|
||||
_id INTEGER PRIMARY KEY,
|
||||
label TEXT NOT NULL,
|
||||
emoji TEXT NOT NULL,
|
||||
rank INTEGER DEFAULT ${Int.MAX_VALUE}
|
||||
)
|
||||
"""
|
||||
)
|
||||
db.execSQL("INSERT INTO emoji_search_tmp (label, emoji) SELECT label, emoji from emoji_search")
|
||||
db.execSQL("DROP TABLE emoji_search")
|
||||
db.execSQL("ALTER TABLE emoji_search_tmp RENAME TO emoji_search")
|
||||
db.execSQL("CREATE INDEX emoji_search_rank_covering ON emoji_search (rank, label, emoji)")
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
package org.thoughtcrime.securesms.database.model;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
|
@ -16,6 +17,12 @@ public final class EmojiSearchData {
|
|||
@JsonProperty
|
||||
private List<String> tags;
|
||||
|
||||
@JsonProperty
|
||||
private String shortName;
|
||||
|
||||
@JsonProperty
|
||||
private int rank;
|
||||
|
||||
public EmojiSearchData() {}
|
||||
|
||||
public @NonNull String getEmoji() {
|
||||
|
@ -25,4 +32,15 @@ public final class EmojiSearchData {
|
|||
public @NonNull List<String> getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public @Nullable String getShortName() {
|
||||
return shortName;
|
||||
}
|
||||
|
||||
/**
|
||||
* A value representing how popular an emoji is, with 1 being the best rank. A value of 0 means this emoji has no rank at all.
|
||||
*/
|
||||
public int getRank() {
|
||||
return rank;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ import java.util.function.Consumer
|
|||
|
||||
private const val MINIMUM_QUERY_THRESHOLD = 1
|
||||
private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2
|
||||
private const val EMOJI_SEARCH_LIMIT = 20
|
||||
private const val EMOJI_SEARCH_LIMIT = 50
|
||||
|
||||
private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex()
|
||||
|
||||
|
|
|
@ -78,6 +78,17 @@ fun Cursor.readToSingleLong(defaultValue: Long = 0): Long {
|
|||
}
|
||||
}
|
||||
|
||||
@JvmOverloads
|
||||
fun Cursor.readToSingleInt(defaultValue: Int = 0): Int {
|
||||
return use {
|
||||
if (it.moveToFirst()) {
|
||||
it.getInt(0)
|
||||
} else {
|
||||
defaultValue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@JvmOverloads
|
||||
inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> {
|
||||
val list = mutableListOf<T>()
|
||||
|
|
Loading…
Add table
Reference in a new issue