URL encoded scrubber.
* Replace scrubber and tests. * Improves email regex performance.
This commit is contained in:
parent
02ea99254a
commit
37bcac40bb
4 changed files with 166 additions and 98 deletions
|
@ -353,16 +353,14 @@ public class SubmitLogFragment extends Fragment {
|
||||||
Context context = weakContext.get();
|
Context context = weakContext.get();
|
||||||
if (context == null) return null;
|
if (context == null) return null;
|
||||||
|
|
||||||
Scrubber scrubber = new Scrubber();
|
CharSequence newLogs;
|
||||||
|
|
||||||
String newLogs;
|
|
||||||
try {
|
try {
|
||||||
long t1 = System.currentTimeMillis();
|
long t1 = System.currentTimeMillis();
|
||||||
String logs = ApplicationContext.getInstance(context).getPersistentLogger().getLogs().get();
|
String logs = ApplicationContext.getInstance(context).getPersistentLogger().getLogs().get();
|
||||||
Log.i(TAG, "Fetch our logs : " + (System.currentTimeMillis() - t1) + " ms");
|
Log.i(TAG, "Fetch our logs : " + (System.currentTimeMillis() - t1) + " ms");
|
||||||
|
|
||||||
long t2 = System.currentTimeMillis();
|
long t2 = System.currentTimeMillis();
|
||||||
newLogs = scrubber.scrub(logs);
|
newLogs = Scrubber.scrub(logs);
|
||||||
Log.i(TAG, "Scrub our logs: " + (System.currentTimeMillis() - t2) + " ms");
|
Log.i(TAG, "Scrub our logs: " + (System.currentTimeMillis() - t2) + " ms");
|
||||||
} catch (InterruptedException | ExecutionException e) {
|
} catch (InterruptedException | ExecutionException e) {
|
||||||
Log.w(TAG, "Failed to retrieve new logs.", e);
|
Log.w(TAG, "Failed to retrieve new logs.", e);
|
||||||
|
@ -374,7 +372,7 @@ public class SubmitLogFragment extends Fragment {
|
||||||
Log.i(TAG, "Fetch logcat: " + (System.currentTimeMillis() - t3) + " ms");
|
Log.i(TAG, "Fetch logcat: " + (System.currentTimeMillis() - t3) + " ms");
|
||||||
|
|
||||||
long t4 = System.currentTimeMillis();
|
long t4 = System.currentTimeMillis();
|
||||||
String scrubbedLogcat = scrubber.scrub(logcat);
|
CharSequence scrubbedLogcat = Scrubber.scrub(logcat);
|
||||||
Log.i(TAG, "Scrub logcat: " + (System.currentTimeMillis() - t4) + " ms");
|
Log.i(TAG, "Scrub logcat: " + (System.currentTimeMillis() - t4) + " ms");
|
||||||
|
|
||||||
|
|
||||||
|
@ -386,7 +384,7 @@ public class SubmitLogFragment extends Fragment {
|
||||||
.append("\n\n\n")
|
.append("\n\n\n")
|
||||||
.append(HEADER_JOBS)
|
.append(HEADER_JOBS)
|
||||||
.append("\n\n")
|
.append("\n\n")
|
||||||
.append(scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
|
.append(Scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo()))
|
||||||
.append("\n\n\n");
|
.append("\n\n\n");
|
||||||
|
|
||||||
if (VERSION.SDK_INT >= 28) {
|
if (VERSION.SDK_INT >= 28) {
|
||||||
|
|
|
@ -17,54 +17,95 @@
|
||||||
|
|
||||||
package org.thoughtcrime.securesms.logsubmit.util;
|
package org.thoughtcrime.securesms.logsubmit.util;
|
||||||
|
|
||||||
|
import androidx.annotation.NonNull;
|
||||||
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scrub data for possibly sensitive information
|
* Scrub data for possibly sensitive information.
|
||||||
*/
|
*/
|
||||||
public class Scrubber {
|
public final class Scrubber {
|
||||||
private static final String TAG = Scrubber.class.getSimpleName();
|
|
||||||
|
|
||||||
private static final Pattern E164_PATTERN = Pattern.compile("\\+\\d{10,15}");
|
private Scrubber() {
|
||||||
private static final Pattern GROUPID_PATTERN = Pattern.compile("__textsecure_group__![^\\s]+");
|
|
||||||
private static final Pattern EMAIL_PATTERN = Pattern.compile("[^\\s]+@[^\\s]+");
|
|
||||||
|
|
||||||
private static final Pattern[] DEFAULTS = new Pattern[] {
|
|
||||||
E164_PATTERN,
|
|
||||||
GROUPID_PATTERN,
|
|
||||||
EMAIL_PATTERN
|
|
||||||
};
|
|
||||||
|
|
||||||
private final Pattern[] patterns;
|
|
||||||
public Scrubber(Pattern... patterns) {
|
|
||||||
this.patterns = patterns;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Scrubber() {
|
/**
|
||||||
this(DEFAULTS);
|
* The middle group will be censored.
|
||||||
|
* Handles URL encoded +, %2B
|
||||||
|
*/
|
||||||
|
private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{8,13})(\\d{2})");
|
||||||
|
private static final String E164_CENSOR = "*************";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The second group will be censored.
|
||||||
|
*/
|
||||||
|
private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)");
|
||||||
|
private static final String EMAIL_CENSOR = "...@...";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The middle group will be censored.
|
||||||
|
*/
|
||||||
|
private static final Pattern GROUP_ID_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})");
|
||||||
|
private static final String GROUP_ID_CENSOR = "...group...";
|
||||||
|
|
||||||
|
public static CharSequence scrub(@NonNull CharSequence in) {
|
||||||
|
|
||||||
|
in = scrubE164(in);
|
||||||
|
in = scrubEmail(in);
|
||||||
|
in = scrubGroups(in);
|
||||||
|
|
||||||
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String scrub(final String in) {
|
private static CharSequence scrubE164(@NonNull CharSequence in) {
|
||||||
String out = in;
|
return scrub(in,
|
||||||
for (Pattern pattern : patterns) {
|
E164_PATTERN,
|
||||||
Matcher matcher = pattern.matcher(out);
|
(matcher, output) -> output.append(matcher.group(1))
|
||||||
StringBuilder builder = new StringBuilder();
|
.append(E164_CENSOR, 0, matcher.group(2).length())
|
||||||
int lastEndingPos = 0;
|
.append(matcher.group(3)));
|
||||||
|
}
|
||||||
|
|
||||||
while (matcher.find()) {
|
private static CharSequence scrubEmail(@NonNull CharSequence in) {
|
||||||
builder.append(out.substring(lastEndingPos, matcher.start()));
|
return scrub(in,
|
||||||
|
CRUDE_EMAIL_PATTERN,
|
||||||
|
(matcher, output) -> output.append(matcher.group(1))
|
||||||
|
.append(EMAIL_CENSOR));
|
||||||
|
}
|
||||||
|
|
||||||
final String censored = matcher.group().substring(0,1) +
|
private static CharSequence scrubGroups(@NonNull CharSequence in) {
|
||||||
new String(new char[matcher.group().length()-3]).replace("\0", "*") +
|
return scrub(in,
|
||||||
matcher.group().substring(matcher.group().length()-2);
|
GROUP_ID_PATTERN,
|
||||||
builder.append(censored);
|
(matcher, output) -> output.append(matcher.group(1))
|
||||||
|
.append(GROUP_ID_CENSOR)
|
||||||
|
.append(matcher.group(3)));
|
||||||
|
}
|
||||||
|
|
||||||
lastEndingPos = matcher.end();
|
private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) {
|
||||||
}
|
final StringBuilder output = new StringBuilder(in.length());
|
||||||
builder.append(out.substring(lastEndingPos));
|
final Matcher matcher = pattern.matcher(in);
|
||||||
out = builder.toString();
|
|
||||||
|
int lastEndingPos = 0;
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
output.append(in, lastEndingPos, matcher.start());
|
||||||
|
|
||||||
|
processMatch.scrubMatch(matcher, output);
|
||||||
|
|
||||||
|
lastEndingPos = matcher.end();
|
||||||
}
|
}
|
||||||
return out;
|
|
||||||
|
if (lastEndingPos == 0) {
|
||||||
|
// there were no matches, save copying all the data
|
||||||
|
return in;
|
||||||
|
} else {
|
||||||
|
output.append(in, lastEndingPos, in.length());
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private interface ProcessMatch {
|
||||||
|
void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,57 +0,0 @@
|
||||||
package org.thoughtcrime.securesms.logsubmit;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.thoughtcrime.securesms.logsubmit.util.Scrubber;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
public class ScrubberTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_phoneNumber_solo() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("+16101234567");
|
|
||||||
|
|
||||||
assertEquals("+*********67", output);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_phoneNumber_surrounded() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("Spider-Man's phone number is +16101234567 -- isn't that crazy?");
|
|
||||||
|
|
||||||
assertEquals("Spider-Man's phone number is +*********67 -- isn't that crazy?", output);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_email_solo() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("jonah@dailybugle.com");
|
|
||||||
|
|
||||||
assertEquals("j*****************om", output);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_email_surrounded() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("Email tips to jonah@dailybugle.com -- it's your civic duty");
|
|
||||||
|
|
||||||
assertEquals("Email tips to j*****************om -- it's your civic duty", output);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_groupId_solo() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("__textsecure_group__!abcdefg1234567890");
|
|
||||||
|
|
||||||
assertEquals("_***********************************90", output);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void scrub_groupId_surrounded() {
|
|
||||||
Scrubber scrubber = new Scrubber();
|
|
||||||
String output = scrubber.scrub("The group id is __textsecure_group__!abcdefg1234567890 and don't forget it");
|
|
||||||
|
|
||||||
assertEquals("The group id is _***********************************90 and don't forget it", output);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
package org.thoughtcrime.securesms.logsubmit.util;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
@RunWith(Parameterized.class)
|
||||||
|
public final class ScrubberTest {
|
||||||
|
|
||||||
|
@Parameterized.Parameters
|
||||||
|
public static Collection<Object[]> data() {
|
||||||
|
return Arrays.asList(new Object[][]{
|
||||||
|
|
||||||
|
{ "An E164 number +15551234567",
|
||||||
|
"An E164 number +*********67" },
|
||||||
|
|
||||||
|
{ "A UK number +447700900000",
|
||||||
|
"A UK number +**********00" },
|
||||||
|
|
||||||
|
{ "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099",
|
||||||
|
"An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" },
|
||||||
|
|
||||||
|
{ "Multiple numbers +447700900001 +447700900002",
|
||||||
|
"Multiple numbers +**********01 +**********02" },
|
||||||
|
|
||||||
|
{ "One less than shortest number +155556789",
|
||||||
|
"One less than shortest number +155556789" },
|
||||||
|
|
||||||
|
{ "Shortest number +1555567890",
|
||||||
|
"Shortest number +********90" },
|
||||||
|
|
||||||
|
{ "Longest number +155556789012345",
|
||||||
|
"Longest number +*************45" },
|
||||||
|
|
||||||
|
{ "One more than longest number +1234567890123456",
|
||||||
|
"One more than longest number +*************456" },
|
||||||
|
|
||||||
|
{ "abc@def.com",
|
||||||
|
"a...@..." },
|
||||||
|
|
||||||
|
{ "An email abc@def.com",
|
||||||
|
"An email a...@..." },
|
||||||
|
|
||||||
|
{ "A short email a@def.com",
|
||||||
|
"A short email a...@..." },
|
||||||
|
|
||||||
|
{ "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain",
|
||||||
|
"A email with multiple parts before the @ d...@... and a multipart domain" },
|
||||||
|
|
||||||
|
{ "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org",
|
||||||
|
"An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." },
|
||||||
|
|
||||||
|
{ "An email and a number abc@def.com +155556789012345",
|
||||||
|
"An email and a number a...@... +*************45" },
|
||||||
|
|
||||||
|
{ "__textsecure_group__!abcdefg1234567890",
|
||||||
|
"__...group...90" },
|
||||||
|
|
||||||
|
{ "A group id __textsecure_group__!abcdefg0987654321 surrounded with text",
|
||||||
|
"A group id __...group...21 surrounded with text" },
|
||||||
|
|
||||||
|
{ "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com with text after",
|
||||||
|
"All patterns in a row __...group...90 +*************456 a...@... with text after"
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String input;
|
||||||
|
private final String expected;
|
||||||
|
|
||||||
|
public ScrubberTest(String input, String expected) {
|
||||||
|
this.input = input;
|
||||||
|
this.expected = expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void scrub() {
|
||||||
|
assertEquals(expected, Scrubber.scrub(input).toString());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue