URL encoded scrubber.
* Replace scrubber and tests. * Improves email regex performance.
This commit is contained in:
parent
02ea99254a
commit
37bcac40bb
4 changed files with 166 additions and 98 deletions
|
@ -353,16 +353,14 @@ public class SubmitLogFragment extends Fragment {
|
|||
Context context = weakContext.get();
|
||||
if (context == null) return null;
|
||||
|
||||
Scrubber scrubber = new Scrubber();
|
||||
|
||||
String newLogs;
|
||||
CharSequence newLogs;
|
||||
try {
|
||||
long t1 = System.currentTimeMillis();
|
||||
String logs = ApplicationContext.getInstance(context).getPersistentLogger().getLogs().get();
|
||||
Log.i(TAG, "Fetch our logs : " + (System.currentTimeMillis() - t1) + " ms");
|
||||
|
||||
long t2 = System.currentTimeMillis();
|
||||
newLogs = scrubber.scrub(logs);
|
||||
newLogs = Scrubber.scrub(logs);
|
||||
Log.i(TAG, "Scrub our logs: " + (System.currentTimeMillis() - t2) + " ms");
|
||||
} catch (InterruptedException | ExecutionException e) {
|
||||
Log.w(TAG, "Failed to retrieve new logs.", e);
|
||||
|
@ -374,7 +372,7 @@ public class SubmitLogFragment extends Fragment {
|
|||
Log.i(TAG, "Fetch logcat: " + (System.currentTimeMillis() - t3) + " ms");
|
||||
|
||||
long t4 = System.currentTimeMillis();
|
||||
String scrubbedLogcat = scrubber.scrub(logcat);
|
||||
CharSequence scrubbedLogcat = Scrubber.scrub(logcat);
|
||||
Log.i(TAG, "Scrub logcat: " + (System.currentTimeMillis() - t4) + " ms");
|
||||
|
||||
|
||||
|
@ -386,7 +384,7 @@ public class SubmitLogFragment extends Fragment {
|
|||
.append("\n\n\n")
|
||||
.append(HEADER_JOBS)
|
||||
.append("\n\n")
|
||||
.append(scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
|
||||
.append(Scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
|
||||
.append("\n\n\n");
|
||||
|
||||
if (VERSION.SDK_INT >= 28) {
|
||||
|
|
|
@ -17,54 +17,95 @@
|
|||
|
||||
package org.thoughtcrime.securesms.logsubmit.util;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Scrub data for possibly sensitive information
|
||||
* Scrub data for possibly sensitive information.
|
||||
*/
|
||||
public class Scrubber {
|
||||
private static final String TAG = Scrubber.class.getSimpleName();
|
||||
public final class Scrubber {
|
||||
|
||||
private static final Pattern E164_PATTERN = Pattern.compile("\\+\\d{10,15}");
|
||||
private static final Pattern GROUPID_PATTERN = Pattern.compile("__textsecure_group__![^\\s]+");
|
||||
private static final Pattern EMAIL_PATTERN = Pattern.compile("[^\\s]+@[^\\s]+");
|
||||
|
||||
private static final Pattern[] DEFAULTS = new Pattern[] {
|
||||
E164_PATTERN,
|
||||
GROUPID_PATTERN,
|
||||
EMAIL_PATTERN
|
||||
};
|
||||
|
||||
private final Pattern[] patterns;
|
||||
public Scrubber(Pattern... patterns) {
|
||||
this.patterns = patterns;
|
||||
private Scrubber() {
|
||||
}
|
||||
|
||||
public Scrubber() {
|
||||
this(DEFAULTS);
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
* Handles URL encoded +, %2B
|
||||
*/
|
||||
private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{8,13})(\\d{2})");
|
||||
private static final String E164_CENSOR = "*************";
|
||||
|
||||
/**
|
||||
* The second group will be censored.
|
||||
*/
|
||||
private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)");
|
||||
private static final String EMAIL_CENSOR = "...@...";
|
||||
|
||||
/**
|
||||
* The middle group will be censored.
|
||||
*/
|
||||
private static final Pattern GROUP_ID_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})");
|
||||
private static final String GROUP_ID_CENSOR = "...group...";
|
||||
|
||||
public static CharSequence scrub(@NonNull CharSequence in) {
|
||||
|
||||
in = scrubE164(in);
|
||||
in = scrubEmail(in);
|
||||
in = scrubGroups(in);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
public String scrub(final String in) {
|
||||
String out = in;
|
||||
for (Pattern pattern : patterns) {
|
||||
Matcher matcher = pattern.matcher(out);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int lastEndingPos = 0;
|
||||
private static CharSequence scrubE164(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
E164_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(E164_CENSOR, 0, matcher.group(2).length())
|
||||
.append(matcher.group(3)));
|
||||
}
|
||||
|
||||
while (matcher.find()) {
|
||||
builder.append(out.substring(lastEndingPos, matcher.start()));
|
||||
private static CharSequence scrubEmail(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
CRUDE_EMAIL_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(EMAIL_CENSOR));
|
||||
}
|
||||
|
||||
final String censored = matcher.group().substring(0,1) +
|
||||
new String(new char[matcher.group().length()-3]).replace("\0", "*") +
|
||||
matcher.group().substring(matcher.group().length()-2);
|
||||
builder.append(censored);
|
||||
private static CharSequence scrubGroups(@NonNull CharSequence in) {
|
||||
return scrub(in,
|
||||
GROUP_ID_PATTERN,
|
||||
(matcher, output) -> output.append(matcher.group(1))
|
||||
.append(GROUP_ID_CENSOR)
|
||||
.append(matcher.group(3)));
|
||||
}
|
||||
|
||||
lastEndingPos = matcher.end();
|
||||
}
|
||||
builder.append(out.substring(lastEndingPos));
|
||||
out = builder.toString();
|
||||
private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) {
|
||||
final StringBuilder output = new StringBuilder(in.length());
|
||||
final Matcher matcher = pattern.matcher(in);
|
||||
|
||||
int lastEndingPos = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
output.append(in, lastEndingPos, matcher.start());
|
||||
|
||||
processMatch.scrubMatch(matcher, output);
|
||||
|
||||
lastEndingPos = matcher.end();
|
||||
}
|
||||
return out;
|
||||
|
||||
if (lastEndingPos == 0) {
|
||||
// there were no matches, save copying all the data
|
||||
return in;
|
||||
} else {
|
||||
output.append(in, lastEndingPos, in.length());
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
private interface ProcessMatch {
|
||||
void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
package org.thoughtcrime.securesms.logsubmit;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.thoughtcrime.securesms.logsubmit.util.Scrubber;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
public class ScrubberTest {
|
||||
|
||||
@Test
|
||||
public void scrub_phoneNumber_solo() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("+16101234567");
|
||||
|
||||
assertEquals("+*********67", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub_phoneNumber_surrounded() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("Spider-Man's phone number is +16101234567 -- isn't that crazy?");
|
||||
|
||||
assertEquals("Spider-Man's phone number is +*********67 -- isn't that crazy?", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub_email_solo() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("jonah@dailybugle.com");
|
||||
|
||||
assertEquals("j*****************om", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub_email_surrounded() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("Email tips to jonah@dailybugle.com -- it's your civic duty");
|
||||
|
||||
assertEquals("Email tips to j*****************om -- it's your civic duty", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub_groupId_solo() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("__textsecure_group__!abcdefg1234567890");
|
||||
|
||||
assertEquals("_***********************************90", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub_groupId_surrounded() {
|
||||
Scrubber scrubber = new Scrubber();
|
||||
String output = scrubber.scrub("The group id is __textsecure_group__!abcdefg1234567890 and don't forget it");
|
||||
|
||||
assertEquals("The group id is _***********************************90 and don't forget it", output);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
package org.thoughtcrime.securesms.logsubmit.util;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public final class ScrubberTest {
|
||||
|
||||
@Parameterized.Parameters
|
||||
public static Collection<Object[]> data() {
|
||||
return Arrays.asList(new Object[][]{
|
||||
|
||||
{ "An E164 number +15551234567",
|
||||
"An E164 number +*********67" },
|
||||
|
||||
{ "A UK number +447700900000",
|
||||
"A UK number +**********00" },
|
||||
|
||||
{ "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099",
|
||||
"An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" },
|
||||
|
||||
{ "Multiple numbers +447700900001 +447700900002",
|
||||
"Multiple numbers +**********01 +**********02" },
|
||||
|
||||
{ "One less than shortest number +155556789",
|
||||
"One less than shortest number +155556789" },
|
||||
|
||||
{ "Shortest number +1555567890",
|
||||
"Shortest number +********90" },
|
||||
|
||||
{ "Longest number +155556789012345",
|
||||
"Longest number +*************45" },
|
||||
|
||||
{ "One more than longest number +1234567890123456",
|
||||
"One more than longest number +*************456" },
|
||||
|
||||
{ "abc@def.com",
|
||||
"a...@..." },
|
||||
|
||||
{ "An email abc@def.com",
|
||||
"An email a...@..." },
|
||||
|
||||
{ "A short email a@def.com",
|
||||
"A short email a...@..." },
|
||||
|
||||
{ "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain",
|
||||
"A email with multiple parts before the @ d...@... and a multipart domain" },
|
||||
|
||||
{ "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org",
|
||||
"An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." },
|
||||
|
||||
{ "An email and a number abc@def.com +155556789012345",
|
||||
"An email and a number a...@... +*************45" },
|
||||
|
||||
{ "__textsecure_group__!abcdefg1234567890",
|
||||
"__...group...90" },
|
||||
|
||||
{ "A group id __textsecure_group__!abcdefg0987654321 surrounded with text",
|
||||
"A group id __...group...21 surrounded with text" },
|
||||
|
||||
{ "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com with text after",
|
||||
"All patterns in a row __...group...90 +*************456 a...@... with text after"
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
private final String input;
|
||||
private final String expected;
|
||||
|
||||
public ScrubberTest(String input, String expected) {
|
||||
this.input = input;
|
||||
this.expected = expected;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void scrub() {
|
||||
assertEquals(expected, Scrubber.scrub(input).toString());
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue