URL encoded scrubber.

* Replace scrubber and tests.

* Improves email regex performance.
This commit is contained in:
Alan Evans 2019-08-06 16:02:12 -04:00 committed by Greyson Parrelli
parent 02ea99254a
commit 37bcac40bb
4 changed files with 166 additions and 98 deletions

View file

@ -353,16 +353,14 @@ public class SubmitLogFragment extends Fragment {
Context context = weakContext.get();
if (context == null) return null;
Scrubber scrubber = new Scrubber();
String newLogs;
CharSequence newLogs;
try {
long t1 = System.currentTimeMillis();
String logs = ApplicationContext.getInstance(context).getPersistentLogger().getLogs().get();
Log.i(TAG, "Fetch our logs : " + (System.currentTimeMillis() - t1) + " ms");
long t2 = System.currentTimeMillis();
newLogs = scrubber.scrub(logs);
newLogs = Scrubber.scrub(logs);
Log.i(TAG, "Scrub our logs: " + (System.currentTimeMillis() - t2) + " ms");
} catch (InterruptedException | ExecutionException e) {
Log.w(TAG, "Failed to retrieve new logs.", e);
@ -374,7 +372,7 @@ public class SubmitLogFragment extends Fragment {
Log.i(TAG, "Fetch logcat: " + (System.currentTimeMillis() - t3) + " ms");
long t4 = System.currentTimeMillis();
String scrubbedLogcat = scrubber.scrub(logcat);
CharSequence scrubbedLogcat = Scrubber.scrub(logcat);
Log.i(TAG, "Scrub logcat: " + (System.currentTimeMillis() - t4) + " ms");
@ -386,7 +384,7 @@ public class SubmitLogFragment extends Fragment {
.append("\n\n\n")
.append(HEADER_JOBS)
.append("\n\n")
.append(scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
.append(Scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
.append("\n\n\n");
if (VERSION.SDK_INT >= 28) {

View file

@ -17,54 +17,95 @@
package org.thoughtcrime.securesms.logsubmit.util;
import androidx.annotation.NonNull;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Scrub data for possibly sensitive information
* Scrub data for possibly sensitive information.
*/
public class Scrubber {
private static final String TAG = Scrubber.class.getSimpleName();
public final class Scrubber {
private static final Pattern E164_PATTERN = Pattern.compile("\\+\\d{10,15}");
private static final Pattern GROUPID_PATTERN = Pattern.compile("__textsecure_group__![^\\s]+");
private static final Pattern EMAIL_PATTERN = Pattern.compile("[^\\s]+@[^\\s]+");
private static final Pattern[] DEFAULTS = new Pattern[] {
E164_PATTERN,
GROUPID_PATTERN,
EMAIL_PATTERN
};
private final Pattern[] patterns;
public Scrubber(Pattern... patterns) {
this.patterns = patterns;
private Scrubber() {
}
public Scrubber() {
this(DEFAULTS);
/**
* The middle group will be censored.
* Handles URL encoded +, %2B
*/
private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{8,13})(\\d{2})");
private static final String E164_CENSOR = "*************";
/**
* The second group will be censored.
*/
private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)");
private static final String EMAIL_CENSOR = "...@...";
/**
* The middle group will be censored.
*/
private static final Pattern GROUP_ID_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})");
private static final String GROUP_ID_CENSOR = "...group...";
public static CharSequence scrub(@NonNull CharSequence in) {
in = scrubE164(in);
in = scrubEmail(in);
in = scrubGroups(in);
return in;
}
public String scrub(final String in) {
String out = in;
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(out);
StringBuilder builder = new StringBuilder();
int lastEndingPos = 0;
private static CharSequence scrubE164(@NonNull CharSequence in) {
return scrub(in,
E164_PATTERN,
(matcher, output) -> output.append(matcher.group(1))
.append(E164_CENSOR, 0, matcher.group(2).length())
.append(matcher.group(3)));
}
while (matcher.find()) {
builder.append(out.substring(lastEndingPos, matcher.start()));
private static CharSequence scrubEmail(@NonNull CharSequence in) {
return scrub(in,
CRUDE_EMAIL_PATTERN,
(matcher, output) -> output.append(matcher.group(1))
.append(EMAIL_CENSOR));
}
final String censored = matcher.group().substring(0,1) +
new String(new char[matcher.group().length()-3]).replace("\0", "*") +
matcher.group().substring(matcher.group().length()-2);
builder.append(censored);
private static CharSequence scrubGroups(@NonNull CharSequence in) {
return scrub(in,
GROUP_ID_PATTERN,
(matcher, output) -> output.append(matcher.group(1))
.append(GROUP_ID_CENSOR)
.append(matcher.group(3)));
}
lastEndingPos = matcher.end();
}
builder.append(out.substring(lastEndingPos));
out = builder.toString();
private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) {
final StringBuilder output = new StringBuilder(in.length());
final Matcher matcher = pattern.matcher(in);
int lastEndingPos = 0;
while (matcher.find()) {
output.append(in, lastEndingPos, matcher.start());
processMatch.scrubMatch(matcher, output);
lastEndingPos = matcher.end();
}
return out;
if (lastEndingPos == 0) {
// there were no matches, save copying all the data
return in;
} else {
output.append(in, lastEndingPos, in.length());
return output;
}
}
private interface ProcessMatch {
void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output);
}
}

View file

@ -1,57 +0,0 @@
package org.thoughtcrime.securesms.logsubmit;
import org.junit.Test;
import org.thoughtcrime.securesms.logsubmit.util.Scrubber;
import static org.junit.Assert.assertEquals;
public class ScrubberTest {
@Test
public void scrub_phoneNumber_solo() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("+16101234567");
assertEquals("+*********67", output);
}
@Test
public void scrub_phoneNumber_surrounded() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("Spider-Man's phone number is +16101234567 -- isn't that crazy?");
assertEquals("Spider-Man's phone number is +*********67 -- isn't that crazy?", output);
}
@Test
public void scrub_email_solo() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("jonah@dailybugle.com");
assertEquals("j*****************om", output);
}
@Test
public void scrub_email_surrounded() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("Email tips to jonah@dailybugle.com -- it's your civic duty");
assertEquals("Email tips to j*****************om -- it's your civic duty", output);
}
@Test
public void scrub_groupId_solo() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("__textsecure_group__!abcdefg1234567890");
assertEquals("_***********************************90", output);
}
@Test
public void scrub_groupId_surrounded() {
Scrubber scrubber = new Scrubber();
String output = scrubber.scrub("The group id is __textsecure_group__!abcdefg1234567890 and don't forget it");
assertEquals("The group id is _***********************************90 and don't forget it", output);
}
}

View file

@ -0,0 +1,86 @@
package org.thoughtcrime.securesms.logsubmit.util;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.Arrays;
import java.util.Collection;
import static org.junit.Assert.assertEquals;
@RunWith(Parameterized.class)
public final class ScrubberTest {
@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][]{
{ "An E164 number +15551234567",
"An E164 number +*********67" },
{ "A UK number +447700900000",
"A UK number +**********00" },
{ "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099",
"An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" },
{ "Multiple numbers +447700900001 +447700900002",
"Multiple numbers +**********01 +**********02" },
{ "One less than shortest number +155556789",
"One less than shortest number +155556789" },
{ "Shortest number +1555567890",
"Shortest number +********90" },
{ "Longest number +155556789012345",
"Longest number +*************45" },
{ "One more than longest number +1234567890123456",
"One more than longest number +*************456" },
{ "abc@def.com",
"a...@..." },
{ "An email abc@def.com",
"An email a...@..." },
{ "A short email a@def.com",
"A short email a...@..." },
{ "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain",
"A email with multiple parts before the @ d...@... and a multipart domain" },
{ "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org",
"An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." },
{ "An email and a number abc@def.com +155556789012345",
"An email and a number a...@... +*************45" },
{ "__textsecure_group__!abcdefg1234567890",
"__...group...90" },
{ "A group id __textsecure_group__!abcdefg0987654321 surrounded with text",
"A group id __...group...21 surrounded with text" },
{ "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com with text after",
"All patterns in a row __...group...90 +*************456 a...@... with text after"
}
});
}
private final String input;
private final String expected;
public ScrubberTest(String input, String expected) {
this.input = input;
this.expected = expected;
}
@Test
public void scrub() {
assertEquals(expected, Scrubber.scrub(input).toString());
}
}