From 8baf07a11c5482e3f0c61c83360fbe043c197736 Mon Sep 17 00:00:00 2001 From: Cody Henthorne Date: Fri, 21 Apr 2023 11:40:16 -0400 Subject: [PATCH] Refine link preview domain restrictions. --- .../v2/text/TextStoryPostLinkEntryFragment.kt | 2 +- .../thoughtcrime/securesms/util/LinkUtil.java | 88 ------------------- .../thoughtcrime/securesms/util/LinkUtil.kt | 84 ++++++++++++++++++ .../securesms/util/LinkUtilTest_isLegal.java | 11 ++- .../util/LinkUtilTest_isValidPreviewUrl.kt | 55 ++++++++++++ 5 files changed, 150 insertions(+), 90 deletions(-) delete mode 100644 app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.java create mode 100644 app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt create mode 100644 app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt diff --git a/app/src/main/java/org/thoughtcrime/securesms/mediasend/v2/text/TextStoryPostLinkEntryFragment.kt b/app/src/main/java/org/thoughtcrime/securesms/mediasend/v2/text/TextStoryPostLinkEntryFragment.kt index 9f0f928915..525382305f 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/mediasend/v2/text/TextStoryPostLinkEntryFragment.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/mediasend/v2/text/TextStoryPostLinkEntryFragment.kt @@ -63,7 +63,7 @@ class TextStoryPostLinkEntryFragment : KeyboardEntryDialogFragment( if (linkPreviewState != null) { val url = linkPreviewState.linkPreview.map { it.url }.orElseGet { linkPreviewState.activeUrlForError } - if (LinkUtil.isLegalUrl(url, false, true)) { + if (LinkUtil.isValidTextStoryPostPreview(url)) { viewModel.setLinkPreview(url) dismissAllowingStateLoss() } else { diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.java b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.java deleted file mode 100644 index 786f0d706a..0000000000 --- a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.java +++ /dev/null @@ -1,88 +0,0 @@ -package org.thoughtcrime.securesms.util; - -import android.text.TextUtils; - -import androidx.annotation.NonNull; -import androidx.annotation.Nullable; - -import org.signal.core.util.SetUtil; -import org.thoughtcrime.securesms.stickers.StickerUrl; - -import java.util.Objects; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import okhttp3.HttpUrl; - -public final class LinkUtil { - - private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$"); - private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$"); - private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$"); - private static final Pattern ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]"); - - private static final Set INVALID_TOP_LEVEL_DOMAINS = SetUtil.newHashSet("onion", "i2p"); - - private LinkUtil() {} - - /** - * @return True if URL is valid for link previews. - */ - public static boolean isValidPreviewUrl(@Nullable String linkUrl) { - if (linkUrl == null) { - return false; - } - - if (StickerUrl.isValidShareLink(linkUrl)) { - return true; - } - - HttpUrl url = HttpUrl.parse(linkUrl); - return url != null && - !TextUtils.isEmpty(url.scheme()) && - "https".equals(url.scheme()) && - isLegalUrl(linkUrl, false, false); - } - - /** - * @return True if URL is valid, mostly useful for linkifying. - */ - public static boolean isLegalUrl(@NonNull String url) { - return isLegalUrl(url, true, false); - } - - public static boolean isLegalUrl(@NonNull String url, boolean skipTopLevelDomainValidation, boolean requireTopLevelDomain) { - if (ILLEGAL_CHARACTERS_PATTERN.matcher(url).find()) { - return false; - } - - Matcher matcher = DOMAIN_PATTERN.matcher(url); - - if (matcher.matches()) { - String domain = Objects.requireNonNull(matcher.group(2)); - String cleanedDomain = domain.replaceAll("\\.", ""); - String topLevelDomain = parseTopLevelDomain(domain); - - boolean validCharacters = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || - ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(); - - boolean validTopLevelDomain = (skipTopLevelDomainValidation || !INVALID_TOP_LEVEL_DOMAINS.contains(topLevelDomain)) && - (!requireTopLevelDomain || topLevelDomain != null); - - return validCharacters && validTopLevelDomain; - } else { - return false; - } - } - - private static @Nullable String parseTopLevelDomain(@NonNull String domain) { - int periodIndex = domain.lastIndexOf("."); - - if (periodIndex >= 0 && periodIndex < domain.length() - 1) { - return domain.substring(periodIndex + 1); - } else { - return null; - } - } -} diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt new file mode 100644 index 0000000000..4d45cfd18d --- /dev/null +++ b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt @@ -0,0 +1,84 @@ +package org.thoughtcrime.securesms.util + +import okhttp3.HttpUrl +import org.thoughtcrime.securesms.stickers.StickerUrl +import java.util.Objects +import java.util.regex.Pattern + +/** + * Utilities for validating various links for multiple situations. + */ +object LinkUtil { + private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$") + private val ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$") + private val ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$") + private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]") + + private val INVALID_DOMAINS = listOf("example", "example\\.com", "example\\.net", "example\\.org", "i2p", "invalid", "localhost", "onion", "test") + private val INVALID_DOMAINS_REGEX: Regex = Regex("^(.+\\.)?(${INVALID_DOMAINS.joinToString("|")})\\.?\$") + + /** + * Link previews must have all valid URL characters, an allowed domain if present, and must include https:// + */ + @JvmStatic + fun isValidPreviewUrl(linkUrl: String?): Boolean { + if (linkUrl == null) { + return false + } + + if (StickerUrl.isValidShareLink(linkUrl)) { + return true + } + + val (isLegal, domain) = isLegalUrlInternal(linkUrl) + + if (!isLegal || domain?.matches(INVALID_DOMAINS_REGEX) == true) { + return false + } + + return HttpUrl.parse(linkUrl)?.scheme() == "https" + } + + /** + * Text story link previews must have all valid URL characters, a present and allowed domain, and must have a TLD. + */ + @JvmStatic + fun isValidTextStoryPostPreview(url: String): Boolean { + val (isLegal, domain) = isLegalUrlInternal(url) + + if (!isLegal || domain == null || domain.matches(INVALID_DOMAINS_REGEX)) { + return false + } + + return domain.lastIndexOf('.', domain.lastIndex) != -1 + } + + /** + * A URL is legal if it has all valid URL characters. + */ + @JvmStatic + fun isLegalUrl(url: String): Boolean { + return isLegalUrlInternal(url).isLegal + } + + private fun isLegalUrlInternal(url: String): LegalCharactersResult { + if (ILLEGAL_CHARACTERS_PATTERN.matcher(url).find()) { + return LegalCharactersResult(false) + } + + val matcher = DOMAIN_PATTERN.matcher(url) + if (!matcher.matches()) { + return LegalCharactersResult(false) + } + + val domain = Objects.requireNonNull(matcher.group(2)) + val cleanedDomain = domain.replace("\\.".toRegex(), "") + + return LegalCharactersResult( + isLegal = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(), + domain = domain + ) + } + + private data class LegalCharactersResult(val isLegal: Boolean, val domain: String? = null) +} diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java index a229fed2e5..6c39cc0c42 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java @@ -36,7 +36,16 @@ public class LinkUtilTest_isLegal { { "кц.рф\u2500", false }, { "кц.рф\u25AA", false }, { "кц.рф\u25FF", false }, - { "", false } + { "", false }, + { "cool.example", true }, + { "cool.example.com", true }, + { "cool.example.net", true }, + { "cool.example.org", true }, + { "cool.invalid", true }, + { "cool.localhost", true }, + { "localhost", true }, + { "https://localhost", true }, + { "cool.test", true } }); } diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt new file mode 100644 index 0000000000..06eb7d5754 --- /dev/null +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt @@ -0,0 +1,55 @@ +package org.thoughtcrime.securesms.util + +import junit.framework.TestCase +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.Parameterized + +@RunWith(Parameterized::class) +class LinkUtilTest_isValidPreviewUrl(private val input: String, private val output: Boolean) { + + @Test + fun isLegal() { + TestCase.assertEquals(output, LinkUtil.isValidPreviewUrl(input)) + } + + companion object { + @Parameterized.Parameters + @JvmStatic + fun data(): Collection> { + return listOf( + arrayOf("google.com", false), + arrayOf("foo.google.com", false), + arrayOf("https://foo.google.com", true), + arrayOf("https://foo.google.com.", true), + arrayOf("https://foo.google.com/some/path.html", true), + arrayOf("кц.рф", false), + arrayOf("https://кц.рф/some/path", true), + arrayOf("https://abcdefg.onion", false), + arrayOf("https://abcdefg.i2p", false), + arrayOf("http://кц.com", false), + arrayOf("кц.com", false), + arrayOf("http://asĸ.com", false), + arrayOf("http://foo.кц.рф", false), + arrayOf("кц.рф\u202C", false), + arrayOf("кц.рф\u202D", false), + arrayOf("кц.рф\u202E", false), + arrayOf("кц.рф\u2500", false), + arrayOf("кц.рф\u25AA", false), + arrayOf("кц.рф\u25FF", false), + arrayOf("", false), + arrayOf("https://cool.example", false), + arrayOf("https://cool.example.com", false), + arrayOf("https://cool.example.net", false), + arrayOf("https://cool.example.org", false), + arrayOf("https://cool.invalid", false), + arrayOf("https://cool.localhost", false), + arrayOf("https://localhost", false), + arrayOf("https://cool.test", false), + arrayOf("https://cool.invalid.com", true), + arrayOf("https://cool.localhost.signal.org", true), + arrayOf("https://cool.test.blarg.gov", true) + ) + } + } +}