Improve digest backfill migration.

This commit is contained in:
Greyson Parrelli 2024-09-06 16:15:08 -04:00 committed by Cody Henthorne
parent 1e8626647e
commit c4bcb7dc93
4 changed files with 15 additions and 59 deletions

View file

@ -1252,7 +1252,7 @@ class AttachmentTable(
/**
* A query for a specific migration. Retrieves attachments that we'd need to create a new digest for.
* These are attachments that have finished downloading and have data to create a digest from.
* This is basically all attachments that have data and are finished downloading.
*/
fun getAttachmentsThatNeedNewDigests(): List<AttachmentId> {
return readableDatabase
@ -1260,33 +1260,14 @@ class AttachmentTable(
.from(TABLE_NAME)
.where(
"""
(
$REMOTE_KEY IS NULL OR
$REMOTE_IV IS NULL OR
$REMOTE_DIGEST IS NULL
)
AND
(
$TRANSFER_STATE = $TRANSFER_PROGRESS_DONE AND
$DATA_FILE NOT NULL
)
$TRANSFER_STATE = $TRANSFER_PROGRESS_DONE AND
$DATA_FILE NOT NULL
"""
)
.run()
.readToList { AttachmentId(it.requireLong(ID)) }
}
/**
* There was a temporary bug where we were saving the wrong size for attachments. This function can be used to correct that.
*/
fun updateSize(attachmentId: AttachmentId, size: Long) {
writableDatabase
.update(TABLE_NAME)
.values(DATA_SIZE to size)
.where("$ID = ?", attachmentId.id)
.run()
}
/**
* As part of the digest backfill process, this updates the (key, IV, digest) tuple for an attachment.
*/

View file

@ -6,9 +6,8 @@
package org.thoughtcrime.securesms.jobs
import org.signal.core.util.Base64
import org.signal.core.util.StreamUtil
import org.signal.core.util.copyTo
import org.signal.core.util.logging.Log
import org.signal.core.util.readLength
import org.signal.core.util.stream.NullOutputStream
import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.attachments.AttachmentId
@ -21,8 +20,8 @@ import org.whispersystems.signalservice.internal.crypto.PaddingInputStream
import java.io.IOException
/**
* For attachments that were created before we saved IV's, this will generate an IV and update the corresponding digest.
* This is important for backupsV2, where we need to know an attachments digest in advance.
* This goes through all attachments with pre-existing data and recalcuates their digests.
* This is important for backupsV2, where we need to know an attachment's digest in advance.
*
* This job needs to be careful to (1) minimize time in the transaction, and (2) never write partial results to disk, i.e. only write the full (key/iv/digest)
* tuple together all at once (partial writes could poison the db, preventing us from retrying properly in the event of a crash or transient error).
@ -43,7 +42,6 @@ class BackfillDigestJob private constructor(
.setQueue("BackfillDigestJob")
.setMaxAttempts(3)
.setLifespan(Parameters.IMMORTAL)
.setPriority(Parameters.PRIORITY_LOW)
.build()
)
@ -66,31 +64,6 @@ class BackfillDigestJob private constructor(
return Result.success()
}
if (attachment.remoteKey != null && attachment.remoteIv != null && attachment.remoteDigest != null) {
Log.w(TAG, "$attachmentId already has all required components! Skipping.")
return Result.success()
}
// There was a bug where we were accidentally saving the padded size for the attachment as the actual size. This corrects that.
// However, we're in a transaction, and reading a file is expensive in general, so we only do this if the length is unset or set to the padded size.
// Given that the padding algorithm targets padding <= 5%, and most attachments are a couple hundred kb, this should greatly limit the false positive rate
// to something like 1 in 10,000ish.
val fileLength = if (attachment.size == 0L || attachment.size == PaddingInputStream.getPaddedSize(attachment.size)) {
try {
SignalDatabase.attachments.getAttachmentStream(attachmentId, offset = 0).use { it.readLength() }
} catch (e: IOException) {
Log.w(TAG, "Could not open a stream for $attachmentId while calculating the length. Assuming that the file no longer exists. Skipping.", e)
return Result.success()
}
} else {
attachment.size
}
if (fileLength != attachment.size) {
Log.w(TAG, "$attachmentId had a saved size of ${attachment.size} but the actual size is $fileLength. Will update.")
SignalDatabase.attachments.updateSize(attachmentId, fileLength)
}
val stream = try {
SignalDatabase.attachments.getAttachmentStream(attachmentId, offset = 0)
} catch (e: IOException) {
@ -99,14 +72,14 @@ class BackfillDigestJob private constructor(
}
// In order to match the exact digest calculation, we need to use the same padding that we would use when uploading the attachment.
Triple(attachment.remoteKey?.let { Base64.decode(it) }, attachment.remoteIv, PaddingInputStream(stream, fileLength))
Triple(attachment.remoteKey?.let { Base64.decode(it) }, attachment.remoteIv, PaddingInputStream(stream, attachment.size))
}
val key = originalKey ?: Util.getSecretBytes(64)
val iv = originalIv ?: Util.getSecretBytes(16)
val cipherOutputStream = AttachmentCipherOutputStream(key, iv, NullOutputStream)
StreamUtil.copy(decryptingStream, cipherOutputStream)
decryptingStream.copyTo(cipherOutputStream)
val digest = cipherOutputStream.transmittedDigest

View file

@ -12,7 +12,6 @@ import org.greenrobot.eventbus.Subscribe;
import org.greenrobot.eventbus.ThreadMode;
import org.signal.core.util.logging.Log;
import org.thoughtcrime.securesms.jobmanager.JobManager;
import org.thoughtcrime.securesms.jobmanager.migrations.RetrieveProfileJobMigration;
import org.thoughtcrime.securesms.keyvalue.SignalStore;
import org.thoughtcrime.securesms.stickers.BlessedPacks;
import org.thoughtcrime.securesms.util.TextSecurePreferences;
@ -154,10 +153,11 @@ public class ApplicationMigrations {
static final int EXPIRE_TIMER_CAPABILITY = 109;
static final int REBUILD_MESSAGE_FTS_INDEX_6 = 110;
static final int EXPIRE_TIMER_CAPABILITY_2 = 111;
static final int BACKFILL_DIGESTS = 112;
// static final int BACKFILL_DIGESTS = 112;
static final int BACKFILL_DIGESTS_V2 = 113;
}
public static final int CURRENT_VERSION = 112;
public static final int CURRENT_VERSION = 113;
/**
* This *must* be called after the {@link JobManager} has been instantiated, but *before* the call
@ -704,8 +704,8 @@ public class ApplicationMigrations {
jobs.put(Version.EXPIRE_TIMER_CAPABILITY_2, new AttributesMigrationJob());
}
if (lastSeenVersion < Version.BACKFILL_DIGESTS) {
jobs.put(Version.BACKFILL_DIGESTS, new BackfillDigestsMigrationJob());
if (lastSeenVersion < Version.BACKFILL_DIGESTS_V2) {
jobs.put(Version.BACKFILL_DIGESTS_V2, new BackfillDigestsMigrationJob());
}
return jobs;

View file

@ -27,6 +27,8 @@ internal class BackfillDigestsMigrationJob(
.map { BackfillDigestJob(it) }
AppDependencies.jobManager.addAll(jobs)
Log.i(TAG, "Enqueued ${jobs.size} backfill digest jobs.")
}
override fun shouldRetry(e: Exception): Boolean = false