Refine link preview domain restrictions.

This commit is contained in:
Cody Henthorne 2023-04-21 11:40:16 -04:00 committed by GitHub
parent a917dace6e
commit 8baf07a11c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 150 additions and 90 deletions

View file

@ -63,7 +63,7 @@ class TextStoryPostLinkEntryFragment : KeyboardEntryDialogFragment(
if (linkPreviewState != null) {
val url = linkPreviewState.linkPreview.map { it.url }.orElseGet { linkPreviewState.activeUrlForError }
if (LinkUtil.isLegalUrl(url, false, true)) {
if (LinkUtil.isValidTextStoryPostPreview(url)) {
viewModel.setLinkPreview(url)
dismissAllowingStateLoss()
} else {

View file

@ -1,88 +0,0 @@
package org.thoughtcrime.securesms.util;
import android.text.TextUtils;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.signal.core.util.SetUtil;
import org.thoughtcrime.securesms.stickers.StickerUrl;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import okhttp3.HttpUrl;
public final class LinkUtil {
private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$");
private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$");
private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$");
private static final Pattern ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]");
private static final Set<String> INVALID_TOP_LEVEL_DOMAINS = SetUtil.newHashSet("onion", "i2p");
private LinkUtil() {}
/**
* @return True if URL is valid for link previews.
*/
public static boolean isValidPreviewUrl(@Nullable String linkUrl) {
if (linkUrl == null) {
return false;
}
if (StickerUrl.isValidShareLink(linkUrl)) {
return true;
}
HttpUrl url = HttpUrl.parse(linkUrl);
return url != null &&
!TextUtils.isEmpty(url.scheme()) &&
"https".equals(url.scheme()) &&
isLegalUrl(linkUrl, false, false);
}
/**
* @return True if URL is valid, mostly useful for linkifying.
*/
public static boolean isLegalUrl(@NonNull String url) {
return isLegalUrl(url, true, false);
}
public static boolean isLegalUrl(@NonNull String url, boolean skipTopLevelDomainValidation, boolean requireTopLevelDomain) {
if (ILLEGAL_CHARACTERS_PATTERN.matcher(url).find()) {
return false;
}
Matcher matcher = DOMAIN_PATTERN.matcher(url);
if (matcher.matches()) {
String domain = Objects.requireNonNull(matcher.group(2));
String cleanedDomain = domain.replaceAll("\\.", "");
String topLevelDomain = parseTopLevelDomain(domain);
boolean validCharacters = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() ||
ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches();
boolean validTopLevelDomain = (skipTopLevelDomainValidation || !INVALID_TOP_LEVEL_DOMAINS.contains(topLevelDomain)) &&
(!requireTopLevelDomain || topLevelDomain != null);
return validCharacters && validTopLevelDomain;
} else {
return false;
}
}
private static @Nullable String parseTopLevelDomain(@NonNull String domain) {
int periodIndex = domain.lastIndexOf(".");
if (periodIndex >= 0 && periodIndex < domain.length() - 1) {
return domain.substring(periodIndex + 1);
} else {
return null;
}
}
}

View file

@ -0,0 +1,84 @@
package org.thoughtcrime.securesms.util
import okhttp3.HttpUrl
import org.thoughtcrime.securesms.stickers.StickerUrl
import java.util.Objects
import java.util.regex.Pattern
/**
* Utilities for validating various links for multiple situations.
*/
object LinkUtil {
private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$")
private val ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$")
private val ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$")
private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]")
private val INVALID_DOMAINS = listOf("example", "example\\.com", "example\\.net", "example\\.org", "i2p", "invalid", "localhost", "onion", "test")
private val INVALID_DOMAINS_REGEX: Regex = Regex("^(.+\\.)?(${INVALID_DOMAINS.joinToString("|")})\\.?\$")
/**
* Link previews must have all valid URL characters, an allowed domain if present, and must include https://
*/
@JvmStatic
fun isValidPreviewUrl(linkUrl: String?): Boolean {
if (linkUrl == null) {
return false
}
if (StickerUrl.isValidShareLink(linkUrl)) {
return true
}
val (isLegal, domain) = isLegalUrlInternal(linkUrl)
if (!isLegal || domain?.matches(INVALID_DOMAINS_REGEX) == true) {
return false
}
return HttpUrl.parse(linkUrl)?.scheme() == "https"
}
/**
* Text story link previews must have all valid URL characters, a present and allowed domain, and must have a TLD.
*/
@JvmStatic
fun isValidTextStoryPostPreview(url: String): Boolean {
val (isLegal, domain) = isLegalUrlInternal(url)
if (!isLegal || domain == null || domain.matches(INVALID_DOMAINS_REGEX)) {
return false
}
return domain.lastIndexOf('.', domain.lastIndex) != -1
}
/**
* A URL is legal if it has all valid URL characters.
*/
@JvmStatic
fun isLegalUrl(url: String): Boolean {
return isLegalUrlInternal(url).isLegal
}
private fun isLegalUrlInternal(url: String): LegalCharactersResult {
if (ILLEGAL_CHARACTERS_PATTERN.matcher(url).find()) {
return LegalCharactersResult(false)
}
val matcher = DOMAIN_PATTERN.matcher(url)
if (!matcher.matches()) {
return LegalCharactersResult(false)
}
val domain = Objects.requireNonNull(matcher.group(2))
val cleanedDomain = domain.replace("\\.".toRegex(), "")
return LegalCharactersResult(
isLegal = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(),
domain = domain
)
}
private data class LegalCharactersResult(val isLegal: Boolean, val domain: String? = null)
}

View file

@ -36,7 +36,16 @@ public class LinkUtilTest_isLegal {
{ "кц.рф\u2500", false },
{ "кц.рф\u25AA", false },
{ "кц.рф\u25FF", false },
{ "", false }
{ "", false },
{ "cool.example", true },
{ "cool.example.com", true },
{ "cool.example.net", true },
{ "cool.example.org", true },
{ "cool.invalid", true },
{ "cool.localhost", true },
{ "localhost", true },
{ "https://localhost", true },
{ "cool.test", true }
});
}

View file

@ -0,0 +1,55 @@
package org.thoughtcrime.securesms.util
import junit.framework.TestCase
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.Parameterized
@RunWith(Parameterized::class)
class LinkUtilTest_isValidPreviewUrl(private val input: String, private val output: Boolean) {
@Test
fun isLegal() {
TestCase.assertEquals(output, LinkUtil.isValidPreviewUrl(input))
}
companion object {
@Parameterized.Parameters
@JvmStatic
fun data(): Collection<Array<Any>> {
return listOf(
arrayOf("google.com", false),
arrayOf("foo.google.com", false),
arrayOf("https://foo.google.com", true),
arrayOf("https://foo.google.com.", true),
arrayOf("https://foo.google.com/some/path.html", true),
arrayOf("кц.рф", false),
arrayOf("https://кц.рф/some/path", true),
arrayOf("https://abcdefg.onion", false),
arrayOf("https://abcdefg.i2p", false),
arrayOf("http://кц.com", false),
arrayOf("кц.com", false),
arrayOf("http://asĸ.com", false),
arrayOf("http://foo.кц.рф", false),
arrayOf("кц.рф\u202C", false),
arrayOf("кц.рф\u202D", false),
arrayOf("кц.рф\u202E", false),
arrayOf("кц.рф\u2500", false),
arrayOf("кц.рф\u25AA", false),
arrayOf("кц.рф\u25FF", false),
arrayOf("", false),
arrayOf("https://cool.example", false),
arrayOf("https://cool.example.com", false),
arrayOf("https://cool.example.net", false),
arrayOf("https://cool.example.org", false),
arrayOf("https://cool.invalid", false),
arrayOf("https://cool.localhost", false),
arrayOf("https://localhost", false),
arrayOf("https://cool.test", false),
arrayOf("https://cool.invalid.com", true),
arrayOf("https://cool.localhost.signal.org", true),
arrayOf("https://cool.test.blarg.gov", true)
)
}
}
}