Ensure archive data is copied when deduping.

This commit is contained in:
Greyson Parrelli 2024-05-20 10:28:17 -04:00 committed by Cody Henthorne
parent 0fb1514da2
commit 5ad38c7960
2 changed files with 76 additions and 16 deletions

View file

@ -16,6 +16,7 @@ import org.signal.core.util.update
import org.thoughtcrime.securesms.attachments.AttachmentId
import org.thoughtcrime.securesms.attachments.Cdn
import org.thoughtcrime.securesms.attachments.PointerAttachment
import org.thoughtcrime.securesms.backup.v2.BackupRepository.getMediaName
import org.thoughtcrime.securesms.database.AttachmentTable.TransformProperties
import org.thoughtcrime.securesms.keyvalue.SignalStore
import org.thoughtcrime.securesms.mms.MediaStream
@ -25,6 +26,8 @@ import org.thoughtcrime.securesms.mms.SentMediaQuality
import org.thoughtcrime.securesms.providers.BlobProvider
import org.thoughtcrime.securesms.recipients.Recipient
import org.thoughtcrime.securesms.util.MediaUtil
import org.thoughtcrime.securesms.util.Util
import org.whispersystems.signalservice.api.backup.MediaId
import org.whispersystems.signalservice.api.push.ServiceId
import java.io.File
import java.util.UUID
@ -195,6 +198,8 @@ class AttachmentTableTest_deduping {
assertDataHashEndMatches(id1, id2)
assertSkipTransform(id1, true)
assertSkipTransform(id2, true)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// Mimics sending two files at once. Ensures all fields are kept in sync as we compress and upload.
@ -220,6 +225,7 @@ class AttachmentTableTest_deduping {
assertDataHashStartMatches(id1, id2)
assertDataHashEndMatches(id1, id2)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// Re-use the upload when uploaded recently
@ -234,6 +240,7 @@ class AttachmentTableTest_deduping {
assertDataHashStartMatches(id1, id2)
assertDataHashEndMatches(id1, id2)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
assertSkipTransform(id1, true)
assertSkipTransform(id2, true)
}
@ -253,6 +260,7 @@ class AttachmentTableTest_deduping {
assertSkipTransform(id2, true)
assertDoesNotHaveRemoteFields(id2)
assertArchiveFieldsMatch(id1, id2)
}
// This isn't so much "desirable behavior" as it is documenting how things work.
@ -282,6 +290,7 @@ class AttachmentTableTest_deduping {
assertSkipTransform(id1, true)
assertSkipTransform(id1, true)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// This represents what would happen if you edited a video, sent it, then forwarded it. We should match, skip transform, and skip upload.
@ -297,6 +306,7 @@ class AttachmentTableTest_deduping {
assertSkipTransform(id1, true)
assertSkipTransform(id1, true)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// This represents what would happen if you edited a video, sent it, then forwarded it, but *edited the forwarded video*. We should not dedupe.
@ -327,6 +337,7 @@ class AttachmentTableTest_deduping {
assertSkipTransform(id1, true)
assertSkipTransform(id1, true)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// This represents what would happen if you sent an image using high quality, then forwarded it using standard quality.
@ -343,6 +354,7 @@ class AttachmentTableTest_deduping {
assertSkipTransform(id1, true)
assertSkipTransform(id1, true)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// Make sure that files marked as unhashable are all updated together
@ -457,6 +469,7 @@ class AttachmentTableTest_deduping {
assertDataHashStartMatches(id1, id2)
assertDataHashEndMatches(id1, id2)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
// Making sure things work for quotes of videos, which have trickier transform properties
@ -470,6 +483,7 @@ class AttachmentTableTest_deduping {
assertDataFilesAreTheSame(id1, id2)
assertDataHashEndMatches(id1, id2)
assertRemoteFieldsMatch(id1, id2)
assertArchiveFieldsMatch(id1, id2)
}
}
@ -648,6 +662,15 @@ class AttachmentTableTest_deduping {
fun upload(attachmentId: AttachmentId, uploadTimestamp: Long = System.currentTimeMillis()) {
SignalDatabase.attachments.finalizeAttachmentAfterUpload(attachmentId, createPointerAttachment(attachmentId, uploadTimestamp), uploadTimestamp)
val attachment = SignalDatabase.attachments.getAttachment(attachmentId)!!
SignalDatabase.attachments.setArchiveData(
attachmentId = attachmentId,
archiveCdn = Cdn.CDN_3.cdnNumber,
archiveMediaName = attachment.getMediaName().name,
archiveThumbnailMediaId = MediaId(Util.getSecretBytes(15)).encode(),
archiveMediaId = MediaId(Util.getSecretBytes(15)).encode()
)
}
fun delete(attachmentId: AttachmentId) {
@ -746,6 +769,15 @@ class AttachmentTableTest_deduping {
assertEquals(lhsAttachment.cdn.cdnNumber, rhsAttachment.cdn.cdnNumber)
}
fun assertArchiveFieldsMatch(lhs: AttachmentId, rhs: AttachmentId) {
val lhsAttachment = SignalDatabase.attachments.getAttachment(lhs)!!
val rhsAttachment = SignalDatabase.attachments.getAttachment(rhs)!!
assertEquals(lhsAttachment.archiveCdn, rhsAttachment.archiveCdn)
assertEquals(lhsAttachment.archiveMediaName, rhsAttachment.archiveMediaName)
assertEquals(lhsAttachment.archiveMediaId, rhsAttachment.archiveMediaId)
}
fun assertDoesNotHaveRemoteFields(attachmentId: AttachmentId) {
val databaseAttachment = SignalDatabase.attachments.getAttachment(attachmentId)!!
assertEquals(0, databaseAttachment.uploadTimestamp)

View file

@ -777,7 +777,7 @@ class AttachmentTable(
// We don't look at hash_start here because that could result in us matching on a file that got compressed down to something smaller, effectively lowering
// the quality of the attachment we received.
val hashMatch: DataFileInfo? = readableDatabase
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP)
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP, ARCHIVE_CDN, ARCHIVE_MEDIA_NAME, ARCHIVE_MEDIA_ID)
.from(TABLE_NAME)
.where("$DATA_HASH_END = ? AND $DATA_HASH_END NOT NULL AND $TRANSFER_STATE = $TRANSFER_PROGRESS_DONE AND $DATA_FILE NOT NULL", fileWriteResult.hash)
.run()
@ -793,6 +793,9 @@ class AttachmentTable(
values.put(DATA_RANDOM, hashMatch.random)
values.put(DATA_HASH_START, hashMatch.hashEnd)
values.put(DATA_HASH_END, hashMatch.hashEnd)
values.put(ARCHIVE_CDN, hashMatch.archiveCdn)
values.put(ARCHIVE_MEDIA_NAME, hashMatch.archiveMediaName)
values.put(ARCHIVE_MEDIA_ID, hashMatch.archiveMediaId)
} else {
values.put(DATA_FILE, fileWriteResult.file.absolutePath)
values.put(DATA_SIZE, fileWriteResult.length)
@ -1214,7 +1217,7 @@ class AttachmentTable(
fun getDataFileInfo(attachmentId: AttachmentId): DataFileInfo? {
return readableDatabase
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP)
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP, ARCHIVE_CDN, ARCHIVE_MEDIA_NAME, ARCHIVE_MEDIA_ID)
.from(TABLE_NAME)
.where("$ID = ?", attachmentId.id)
.run()
@ -1432,18 +1435,34 @@ class AttachmentTable(
return readableDatabase.rawQuery(query, null)
}
/**
* Sets the archive data for the specific attachment, as well as for any attachments that use the same underlying file.
*/
fun setArchiveData(attachmentId: AttachmentId, archiveCdn: Int, archiveMediaName: String, archiveMediaId: String, archiveThumbnailMediaId: String) {
writableDatabase
.update(TABLE_NAME)
.values(
ARCHIVE_CDN to archiveCdn,
ARCHIVE_MEDIA_ID to archiveMediaId,
ARCHIVE_MEDIA_NAME to archiveMediaName,
ARCHIVE_THUMBNAIL_MEDIA_ID to archiveThumbnailMediaId,
ARCHIVE_TRANSFER_STATE to ArchiveTransferState.FINISHED.value
)
.where("$ID = ?", attachmentId.id)
.run()
writableDatabase.withinTransaction { db ->
val dataFile = db
.select(DATA_FILE)
.from(TABLE_NAME)
.where("$ID = ?", attachmentId.id)
.run()
.readToSingleObject { it.requireString(DATA_FILE) }
if (dataFile == null) {
Log.w(TAG, "No data file found for attachment $attachmentId. Can't set archive data.")
return@withinTransaction
}
db.update(TABLE_NAME)
.values(
ARCHIVE_CDN to archiveCdn,
ARCHIVE_MEDIA_ID to archiveMediaId,
ARCHIVE_MEDIA_NAME to archiveMediaName,
ARCHIVE_THUMBNAIL_MEDIA_ID to archiveThumbnailMediaId,
ARCHIVE_TRANSFER_STATE to ArchiveTransferState.FINISHED.value
)
.where("$DATA_FILE = ?", dataFile)
.run()
}
}
fun updateArchiveCdnByMediaId(archiveMediaId: String, archiveCdn: Int): Int {
@ -1796,7 +1815,7 @@ class AttachmentTable(
// First we'll check if our file hash matches the starting or ending hash of any other attachments and has compatible transform properties.
// We'll prefer the match with the most recent upload timestamp.
val hashMatch: DataFileInfo? = readableDatabase
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP)
.select(ID, DATA_FILE, DATA_SIZE, DATA_RANDOM, DATA_HASH_START, DATA_HASH_END, TRANSFORM_PROPERTIES, UPLOAD_TIMESTAMP, ARCHIVE_CDN, ARCHIVE_MEDIA_NAME, ARCHIVE_MEDIA_ID)
.from(TABLE_NAME)
.where("$DATA_FILE NOT NULL AND ($DATA_HASH_START = ? OR $DATA_HASH_END = ?)", fileWriteResult.hash, fileWriteResult.hash)
.run()
@ -1826,6 +1845,9 @@ class AttachmentTable(
contentValues.put(DATA_RANDOM, hashMatch.random)
contentValues.put(DATA_HASH_START, fileWriteResult.hash)
contentValues.put(DATA_HASH_END, hashMatch.hashEnd)
contentValues.put(ARCHIVE_CDN, hashMatch.archiveCdn)
contentValues.put(ARCHIVE_MEDIA_NAME, hashMatch.archiveMediaName)
contentValues.put(ARCHIVE_MEDIA_ID, hashMatch.archiveMediaId)
if (hashMatch.transformProperties.skipTransform) {
Log.i(TAG, "[insertAttachmentWithData] The hash match has a DATA_HASH_END and skipTransform=true, so skipping transform of the new file as well. (MessageId: $messageId, ${attachment.uri})")
@ -1987,7 +2009,10 @@ class AttachmentTable(
hashStart = this.requireString(DATA_HASH_START),
hashEnd = this.requireString(DATA_HASH_END),
transformProperties = TransformProperties.parse(this.requireString(TRANSFORM_PROPERTIES)),
uploadTimestamp = this.requireLong(UPLOAD_TIMESTAMP)
uploadTimestamp = this.requireLong(UPLOAD_TIMESTAMP),
archiveCdn = this.requireInt(ARCHIVE_CDN),
archiveMediaName = this.requireString(ARCHIVE_MEDIA_NAME),
archiveMediaId = this.requireString(ARCHIVE_MEDIA_ID)
)
}
@ -2050,7 +2075,10 @@ class AttachmentTable(
val hashStart: String?,
val hashEnd: String?,
val transformProperties: TransformProperties,
val uploadTimestamp: Long
val uploadTimestamp: Long,
val archiveCdn: Int,
val archiveMediaName: String?,
val archiveMediaId: String?
)
@VisibleForTesting