Build a simple ANR detector.

This commit is contained in:
Greyson Parrelli 2023-10-31 13:53:32 -07:00
parent 776a4c5dce
commit 5a005fb809
7 changed files with 308 additions and 2 deletions

View file

@ -30,6 +30,7 @@ import org.conscrypt.Conscrypt;
import org.greenrobot.eventbus.EventBus;
import org.signal.aesgcmprovider.AesGcmProvider;
import org.signal.core.util.MemoryTracker;
import org.signal.core.util.concurrent.AnrDetector;
import org.signal.core.util.concurrent.SignalExecutors;
import org.signal.core.util.logging.AndroidLogger;
import org.signal.core.util.logging.Log;
@ -109,6 +110,7 @@ import io.reactivex.rxjava3.exceptions.OnErrorNotImplementedException;
import io.reactivex.rxjava3.exceptions.UndeliverableException;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import io.reactivex.rxjava3.schedulers.Schedulers;
import kotlin.Unit;
import rxdogtag2.RxDogTag;
/**
@ -151,6 +153,7 @@ public class ApplicationContext extends MultiDexApplication implements AppForegr
initializeLogging();
Log.i(TAG, "onCreate()");
})
.addBlocking("anr-detector", this::startAnrDetector)
.addBlocking("security-provider", this::initializeSecurityProvider)
.addBlocking("crash-handling", this::initializeCrashHandling)
.addBlocking("rx-init", this::initializeRx)
@ -227,6 +230,7 @@ public class ApplicationContext extends MultiDexApplication implements AppForegr
SubscriptionKeepAliveJob.enqueueAndTrackTimeIfNecessary();
ExternalLaunchDonationJob.enqueueIfNecessary();
FcmFetchManager.onForeground(this);
startAnrDetector();
SignalExecutors.BOUNDED.execute(() -> {
FeatureFlags.refreshIfNecessary();
@ -260,6 +264,7 @@ public class ApplicationContext extends MultiDexApplication implements AppForegr
ApplicationDependencies.getShakeToReport().disable();
ApplicationDependencies.getDeadlockDetector().stop();
MemoryTracker.stop();
AnrDetector.stop();
}
public void checkBuildExpiration() {
@ -269,6 +274,17 @@ public class ApplicationContext extends MultiDexApplication implements AppForegr
}
}
/**
* Note: this is purposefully "started" twice -- once during application create, and once during foreground.
* This is so we can capture ANR's that happen on boot before the foreground event.
*/
private void startAnrDetector() {
AnrDetector.start(TimeUnit.SECONDS.toMillis(5), FeatureFlags::internalUser, (dumps) -> {
LogDatabase.getInstance(this).anrs().save(System.currentTimeMillis(), dumps);
return Unit.INSTANCE;
});
}
private void initializeSecurityProvider() {
int aesPosition = Security.insertProviderAt(new AesGcmProvider(), 1);
Log.i(TAG, "Installed AesGcmProvider: " + aesPosition);

View file

@ -171,6 +171,17 @@ class InternalSettingsFragment : DSLSettingsFragment(R.string.preferences__inter
}
)
clickPref(
title = DSLSettingsText.from("Clear all logs"),
onClick = {
SimpleTask.run({
LogDatabase.getInstance(requireActivity().application).logs.clearAll()
}) {
Toast.makeText(requireContext(), "Cleared all logs", Toast.LENGTH_SHORT).show()
}
}
)
clickPref(
title = DSLSettingsText.from("Clear keep longer logs"),
onClick = {
@ -178,6 +189,28 @@ class InternalSettingsFragment : DSLSettingsFragment(R.string.preferences__inter
}
)
clickPref(
title = DSLSettingsText.from("Clear all crashes"),
onClick = {
SimpleTask.run({
LogDatabase.getInstance(requireActivity().application).crashes.clear()
}) {
Toast.makeText(requireContext(), "Cleared crashes", Toast.LENGTH_SHORT).show()
}
}
)
clickPref(
title = DSLSettingsText.from("Clear all ANRs"),
onClick = {
SimpleTask.run({
LogDatabase.getInstance(requireActivity().application).anrs.clear()
}) {
Toast.makeText(requireContext(), "Cleared ANRs", Toast.LENGTH_SHORT).show()
}
}
)
clickPref(
title = DSLSettingsText.from("Log dump PreKey ServiceId-KeyIds"),
onClick = {

View file

@ -16,6 +16,7 @@ import org.signal.core.util.logging.Log
import org.signal.core.util.mebiBytes
import org.signal.core.util.readToList
import org.signal.core.util.readToSingleInt
import org.signal.core.util.requireLong
import org.signal.core.util.requireNonNullString
import org.signal.core.util.select
import org.signal.core.util.update
@ -56,7 +57,7 @@ class LogDatabase private constructor(
companion object {
private val TAG = Log.tag(LogDatabase::class.java)
private const val DATABASE_VERSION = 3
private const val DATABASE_VERSION = 4
private const val DATABASE_NAME = "signal-logs.db"
@SuppressLint("StaticFieldLeak") // We hold an Application context, not a view context
@ -83,11 +84,15 @@ class LogDatabase private constructor(
@get:JvmName("crashes")
val crashes: CrashTable by lazy { CrashTable(this) }
@get:JvmName("anrs")
val anrs: AnrTable by lazy { AnrTable(this) }
override fun onCreate(db: SQLiteDatabase) {
Log.i(TAG, "onCreate()")
db.execSQL(LogTable.CREATE_TABLE)
db.execSQL(CrashTable.CREATE_TABLE)
db.execSQL(AnrTable.CREATE_TABLE)
LogTable.CREATE_INDEXES.forEach { db.execSQL(it) }
CrashTable.CREATE_INDEXES.forEach { db.execSQL(it) }
@ -108,6 +113,10 @@ class LogDatabase private constructor(
db.execSQL("CREATE INDEX crash_created_at ON crash (created_at)")
db.execSQL("CREATE INDEX crash_name_message ON crash (name, message)")
}
if (oldVersion < 4) {
db.execSQL("CREATE TABLE anr (_id INTEGER PRIMARY KEY, created_at INTEGER NOT NULL, thread_dump TEXT NOT NULL)")
}
}
override fun onOpen(db: SQLiteDatabase) {
@ -258,6 +267,12 @@ class LogDatabase private constructor(
.run()
}
fun clearAll() {
writableDatabase
.delete(TABLE_NAME)
.run()
}
private fun getSize(query: String?, args: Array<String>?): Long {
readableDatabase.query(TABLE_NAME, arrayOf("SUM($SIZE)"), query, args, null, null, null).use { cursor ->
return if (cursor.moveToFirst()) {
@ -387,6 +402,12 @@ class LogDatabase private constructor(
.run()
}
fun clear() {
writableDatabase
.delete(TABLE_NAME)
.run()
}
private fun CrashConfig.CrashPattern.asLikeQuery(): Pair<String, Array<String>> {
val query = StringBuilder()
var args = arrayOf<String>()
@ -415,4 +436,72 @@ class LogDatabase private constructor(
return query.toString() to args
}
}
class AnrTable(private val openHelper: LogDatabase) {
companion object {
const val TABLE_NAME = "anr"
const val ID = "_id"
const val CREATED_AT = "created_at"
const val THREAD_DUMP = "thread_dump"
const val CREATE_TABLE = """
CREATE TABLE $TABLE_NAME (
$ID INTEGER PRIMARY KEY,
$CREATED_AT INTEGER NOT NULL,
$THREAD_DUMP TEXT NOT NULL
)
"""
}
private val readableDatabase: SQLiteDatabase get() = openHelper.readableDatabase
private val writableDatabase: SQLiteDatabase get() = openHelper.writableDatabase
fun save(currentTime: Long, threadDumps: String) {
writableDatabase
.insertInto(TABLE_NAME)
.values(
CREATED_AT to currentTime,
THREAD_DUMP to threadDumps
)
.run()
val count = writableDatabase
.delete(TABLE_NAME)
.where(
"""
$ID NOT IN (SELECT $ID FROM $TABLE_NAME ORDER BY $CREATED_AT DESC LIMIT 10)
""".trimIndent()
)
.run()
if (count > 0) {
Log.i(TAG, "Deleted $count old ANRs")
}
}
fun getAll(): List<AnrRecord> {
return readableDatabase
.select()
.from(TABLE_NAME)
.run()
.readToList { cursor ->
AnrRecord(
createdAt = cursor.requireLong(CREATED_AT),
threadDump = cursor.requireNonNullString(THREAD_DUMP)
)
}
.sortedBy { it.createdAt }
}
fun clear() {
writableDatabase
.delete(TABLE_NAME)
.run()
}
data class AnrRecord(
val createdAt: Long,
val threadDump: String
)
}
}

View file

@ -0,0 +1,35 @@
/*
* Copyright 2023 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.thoughtcrime.securesms.logsubmit
import android.content.Context
import org.thoughtcrime.securesms.database.LogDatabase
import org.thoughtcrime.securesms.dependencies.ApplicationDependencies
import java.text.SimpleDateFormat
import java.util.Date
import java.util.Locale
class LogSectionAnr : LogSection {
companion object {
private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS zzz", Locale.US)
}
override fun getTitle(): String = "ANR"
override fun getContent(context: Context): CharSequence {
val anrs = LogDatabase.getInstance(ApplicationDependencies.getApplication()).anrs.getAll()
return if (anrs.isEmpty()) {
"None"
} else {
"\n" + anrs.joinToString(separator = "\n\n") {
val date = dateFormat.format(Date(it.createdAt))
"------------- $date -------------\n${it.threadDump}"
}
}
}
}

View file

@ -97,6 +97,7 @@ public class SubmitDebugLogRepository {
add(new LogSectionSenderKey());
}
add(new LogSectionRemappedRecords());
add(new LogSectionAnr());
add(new LogSectionLogcat());
add(new LogSectionLoggerHeader());
}};

View file

@ -303,7 +303,7 @@ class DeleteBuilderPart1(
}
fun run(): Int {
return db.delete(tableName, null, null)
return db.delete(tableName, null, emptyArray())
}
}

View file

@ -0,0 +1,132 @@
/*
* Copyright 2023 Signal Messenger, LLC
* SPDX-License-Identifier: AGPL-3.0-only
*/
package org.signal.core.util.concurrent
import android.os.Debug
import android.os.Looper
import androidx.annotation.MainThread
import org.signal.core.util.ThreadUtil
import org.signal.core.util.logging.Log
import java.lang.IllegalStateException
import java.lang.RuntimeException
import java.text.SimpleDateFormat
import java.util.Date
import java.util.Locale
import kotlin.time.Duration
import kotlin.time.Duration.Companion.milliseconds
import kotlin.time.Duration.Companion.seconds
/**
* Attempts to detect ANR's by posting runnables to the main thread and detecting if they've been run within the [anrThreshold].
* If an ANR is detected, it is logged, and the [anrSaver] is called with the series of thread dumps that were taken of the main thread.
*
* The detection of an ANR will cause an internal user to crash.
*/
object AnrDetector {
private val TAG = Log.tag(AnrDetector::class.java)
private var thread: AnrDetectorThread? = null
private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS zzz", Locale.US)
@JvmStatic
@MainThread
fun start(anrThreshold: Long = 5.seconds.inWholeMilliseconds, isInternal: () -> Boolean, anrSaver: (String) -> Unit) {
thread?.end()
thread = null
thread = AnrDetectorThread(anrThreshold.milliseconds, isInternal, anrSaver)
thread!!.start()
}
@JvmStatic
@MainThread
fun stop() {
thread?.end()
thread = null
}
private class AnrDetectorThread(
private val anrThreshold: Duration,
private val isInternal: () -> Boolean,
private val anrSaver: (String) -> Unit
) : Thread("signal-anr") {
@Volatile
private var uiRan = false
private val uiRunnable = Runnable {
uiRan = true
}
@Volatile
private var stopped = false
override fun run() {
while (!stopped) {
uiRan = false
ThreadUtil.postToMain(uiRunnable)
val intervalCount = 5
val intervalDuration = anrThreshold.inWholeMilliseconds / intervalCount
if (intervalDuration == 0L) {
throw IllegalStateException("ANR threshold is too small!")
}
val dumps = mutableListOf<String>()
for (i in 1..intervalCount) {
if (stopped) {
Log.i(TAG, "Thread shutting down during intervals.")
return
}
ThreadUtil.sleep(intervalDuration)
if (!uiRan) {
dumps += getMainThreadDump()
} else {
dumps.clear()
}
}
if (!uiRan && !Debug.isDebuggerConnected() && !Debug.waitingForDebugger()) {
Log.w(TAG, "Failed to post to main in ${anrThreshold.inWholeMilliseconds} ms! Likely ANR!")
val dumpString = dumps.joinToString(separator = "\n\n")
Log.w(TAG, "Main thread dumps:\n$dumpString")
ThreadUtil.cancelRunnableOnMain(uiRunnable)
anrSaver(dumpString)
if (isInternal()) {
Log.e(TAG, "Internal user -- crashing!")
throw SignalAnrException()
}
}
dumps.clear()
}
Log.i(TAG, "Thread shutting down.")
}
fun end() {
stopped = true
}
private fun getMainThreadDump(): String {
val dump: Map<Thread, Array<StackTraceElement>> = Thread.getAllStackTraces()
val mainThread = Looper.getMainLooper().thread
val date = dateFormat.format(Date())
val dumpString = dump[mainThread]?.joinToString(separator = "\n") ?: "Not available."
return "--- $date:\n$dumpString"
}
}
private class SignalAnrException : RuntimeException()
}