Disallow some unicode sequences in link previews.

This commit is contained in:
Greyson Parrelli 2022-03-25 14:34:21 -04:00
parent f2046c3c05
commit 72777bc6cd
2 changed files with 11 additions and 0 deletions

View file

@ -39,6 +39,7 @@ public final class LinkPreviewUtil {
private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$");
private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$");
private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$");
private static final Pattern ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]");
private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>");
private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>");
private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\"");
@ -80,6 +81,10 @@ public final class LinkPreviewUtil {
}
public static boolean isLegalUrl(@NonNull String url) {
if (ILLEGAL_CHARACTERS_PATTERN.matcher(url).find()) {
return false;
}
Matcher matcher = DOMAIN_PATTERN.matcher(url);
if (matcher.matches()) {

View file

@ -30,6 +30,12 @@ public class LinkPreviewUtilTest_isLegal {
{ "http://foo.кц.рф", false },
{ "https://abcdefg.onion", false },
{ "https://abcdefg.i2p", false },
{ "кц.рф\u202C", false },
{ "кц.рф\u202D", false },
{ "кц.рф\u202E", false },
{ "кц.рф\u2500", false },
{ "кц.рф\u25AA", false },
{ "кц.рф\u25FF", false },
{ "", false }
});
}