Skip to content

Commit

Permalink
ahocorasick: improve matching with subdomains
Browse files Browse the repository at this point in the history
The basic idea is to have the following logic:
* pattern "DOMAIN" matches the domain itself (i.e exact match) *and* any
subdomains (i.e. "ANYTHING.DOMAIN")
* pattern "DOMAIN." matches *also* any strings for which is a prefix
[please, note that this kind of match is handy but it is quite
dangerous...]
* pattern "-DOMAIN" matches *also* any strings for which is a postfix

Examples:
* pattern "wikipedia.it":
  * "wikipiedia.it" -> OK
  * "foo.wikipedia.it -> OK
  * "foowikipedia.it -> NO MATCH
  * "wikipedia.it.com -> NO MATCH
* pattern "wikipedia.":
  * "wikipedia.it" -> OK
  * "foo.wikipedia.it -> OK
  * "foowikipedia.it -> NO MATCH
  * "wikipedia.it.com -> OK
* pattern "-wikipedia.it":
  * "wikipedia.it" -> NO MATCH
  * "foo.wikipedia.it -> NO MATCH
  * "0001-wikipedia.it -> OK
  * "foo.0001-wikipedia.it -> OK

Bottom line:
* exact match
* prefix with "." (always, implicit)
* prefix with "-" (only if esplicitly set)
* postfix with "." (only if esplicitly set)

That means that the patterns cannot start with '.' anymore.

Close #2330
  • Loading branch information
IvanNardi committed Feb 28, 2024
1 parent c2b5b48 commit 82f6dc5
Show file tree
Hide file tree
Showing 13 changed files with 527 additions and 464 deletions.
43 changes: 43 additions & 0 deletions example/ndpiReader.c
Original file line number Diff line number Diff line change
Expand Up @@ -5005,6 +5005,48 @@ void automataUnitTest() {
ndpi_free_automa(automa);
}

/* *********************************************** */

void automataDomainsUnitTest() {
void *automa = ndpi_init_automa_domain();

assert(automa);
assert(ndpi_add_string_to_automa(automa, ndpi_strdup("wikipedia.it")) == 0);
ndpi_finalize_automa(automa);
assert(ndpi_match_string(automa, "wikipedia.it") == 1);
assert(ndpi_match_string(automa, "foo.wikipedia.it") == 1);
assert(ndpi_match_string(automa, "foowikipedia.it") == 0);
assert(ndpi_match_string(automa, "foowikipedia") == 0);
assert(ndpi_match_string(automa, "-wikipedia.it") == 0);
assert(ndpi_match_string(automa, "foo-wikipedia.it") == 0);
assert(ndpi_match_string(automa, "wikipedia.it.com") == 0);
ndpi_free_automa(automa);

automa = ndpi_init_automa_domain();
assert(automa);
assert(ndpi_add_string_to_automa(automa, ndpi_strdup("wikipedia.")) == 0);
ndpi_finalize_automa(automa);
assert(ndpi_match_string(automa, "wikipedia.it") == 1);
assert(ndpi_match_string(automa, "foo.wikipedia.it") == 1);
assert(ndpi_match_string(automa, "foowikipedia.it") == 0);
assert(ndpi_match_string(automa, "foowikipedia") == 0);
assert(ndpi_match_string(automa, "-wikipedia.it") == 0);
assert(ndpi_match_string(automa, "foo-wikipedia.it") == 0);
assert(ndpi_match_string(automa, "wikipediafoo") == 0);
assert(ndpi_match_string(automa, "wikipedia.it.com") == 1);
ndpi_free_automa(automa);

automa = ndpi_init_automa_domain();
assert(automa);
assert(ndpi_add_string_to_automa(automa, ndpi_strdup("-buy.itunes.apple.com")) == 0);
ndpi_finalize_automa(automa);
assert(ndpi_match_string(automa, "buy.itunes.apple.com") == 0);
assert(ndpi_match_string(automa, "p53-buy.itunes.apple.com") == 1);
assert(ndpi_match_string(automa, "p53buy.itunes.apple.com") == 0);
assert(ndpi_match_string(automa, "foo.p53-buy.itunes.apple.com") == 1);
ndpi_free_automa(automa);
}

#endif

/* *********************************************** */
Expand Down Expand Up @@ -5927,6 +5969,7 @@ int main(int argc, char **argv) {
bitmapUnitTest();
filterUnitTest();
automataUnitTest();
automataDomainsUnitTest();
analyzeUnitTest();
ndpi_self_check_host_match(stderr);
analysisUnitTest();
Expand Down
1 change: 1 addition & 0 deletions src/include/ndpi_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ extern "C" {
*
*/
void* ndpi_init_automa(void);
void *ndpi_init_automa_domain(void);

/**
* Free Aho-Corasick automata allocated with ndpi_init_automa();
Expand Down
747 changes: 367 additions & 380 deletions src/lib/ndpi_content_match.c.inc

Large diffs are not rendered by default.

43 changes: 37 additions & 6 deletions src/lib/ndpi_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,15 @@ void ndpi_self_check_host_match(FILE *error_out) {
u_int32_t i, j;

for(i = 0; host_match[i].string_to_match != NULL; i++) {
if(host_match[i].string_to_match[0] == '.') {
if (error_out != NULL) {
fprintf(error_out,
"[NDPI] INTERNAL ERROR Invalid string detected '%s'. It can not start with '.'\n",
host_match[i].string_to_match);
fprintf(error_out, "\nPlease fix host_match[] in ndpi_content_match.c.inc\n");
}
abort();
}
for(j = 0; host_match[j].string_to_match != NULL; j++) {
if((i != j) && (strcmp(host_match[i].string_to_match, host_match[j].string_to_match) == 0)) {
if (error_out != NULL) {
Expand Down Expand Up @@ -2300,24 +2309,40 @@ int ac_domain_match_handler(AC_MATCH_t *m, AC_TEXT_t *txt, AC_REP_t *match) {
return 1;
}
/* pattern is DOMAIN.NAME and string x.DOMAIN.NAME ? */
if(start > 1 && !ndpi_is_middle_string_char(pattern->astring[0]) && pattern->rep.dot) {
if(start >= 1 && !ndpi_is_middle_string_char(pattern->astring[0])) {
/*
The patch below allows in case of pattern ws.amazon.com
to avoid matching aws.amazon.com whereas a.ws.amazon.com
has to match
*/
if(ndpi_is_middle_string_char(txt->astring[start-1])) {
if(txt->astring[start-1] == '.') {
if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) {
txt->match.last = pattern; *match = pattern->rep;
MATCH_DEBUG_INFO("[NDPI] Searching: Found domain match (pre). Proto %d \n",pattern->rep.number);
}
}
continue;
}

/* pattern is -DOMAIN.NAME and string x-DOMAIN.NAME ? */
if(start >= 1 && pattern->astring[0] == '-') {
if(txt->astring[start] == '-') {
if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) {
txt->match.last = pattern; *match = pattern->rep;
MATCH_DEBUG_INFO("[NDPI] Searching: Found domain match. Proto %d \n",pattern->rep.number);
MATCH_DEBUG_INFO("[NDPI] Searching: Found domain match (pre -). Proto %d \n",pattern->rep.number);
}
}
continue;
}

if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) {
txt->match.last = pattern; *match = pattern->rep;
MATCH_DEBUG_INFO("[NDPI] Searching: matched. Proto %d \n",pattern->rep.number);
/* pattern is DOMAIN. and string DOMAIN.SOMETHING ? or
DOMAIN- and DOMAIN-SOMETHING */
if(start == 0 && ndpi_is_middle_string_char(pattern->astring[pattern->length - 1])) {
if(!txt->match.last || txt->match.last->rep.level < pattern->rep.level) {
txt->match.last = pattern; *match = pattern->rep;
MATCH_DEBUG_INFO("[NDPI] Searching: Found domain match (post). Proto %d \n",pattern->rep.number);
}
continue;
}
}
return 0;
Expand Down Expand Up @@ -3777,6 +3802,12 @@ int ndpi_finalize_initialization(struct ndpi_detection_module_struct *ndpi_str)

/* Wrappers */
void *ndpi_init_automa(void) {
return(ac_automata_init(NULL));
}

/* ****************************************************** */

void *ndpi_init_automa_domain(void) {
return(ac_automata_init(ac_domain_match_handler));
}

Expand Down
Binary file modified tests/cfgs/default/pcap/sites.pcapng
Binary file not shown.
12 changes: 6 additions & 6 deletions tests/cfgs/default/result/anyconnect-vpn.pcap.out
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ LRU cache tls_cert: 0/11/0 (insert/search/found)
LRU cache mining: 0/8/0 (insert/search/found)
LRU cache msteams: 0/0/0 (insert/search/found)
LRU cache stun_zoom: 0/1/0 (insert/search/found)
Automa host: 69/13 (search/found)
Automa host: 69/15 (search/found)
Automa domain: 69/0 (search/found)
Automa tls cert: 4/0 (search/found)
Automa risk mask: 22/1 (search/found)
Expand All @@ -28,7 +28,7 @@ Patricia protocols: 133/7 (search/found)
Patricia protocols IPv6: 6/0 (search/found)

Unknown 19 1054 2
DNS 30 3350 15
DNS 28 3164 14
HTTP 50 11137 5
MDNS 24 4279 4
NetBIOS 15 1542 1
Expand All @@ -41,12 +41,12 @@ TLS 328 86914 13
ICMPV6 18 2964 2
Slack 29 4979 2
AJP 5 390 1
Apple 2 297 1
Apple 4 483 2
CiscoVPN 3 198 1
ApplePush 6 966 3

Safe 359 93320 15
Acceptable 207 36239 52
Safe 361 93506 16
Acceptable 205 36053 51
Unrated 19 1054 2

JA3 Host Stats:
Expand Down Expand Up @@ -106,7 +106,7 @@ JA3 Host Stats:
50 TCP 10.0.0.227:56865 <-> 10.0.0.149:8008 [proto: 161/CiscoVPN][IP: 0/Unknown][Encrypted][Confidence: Match by port][DPI packets: 3][cat: VPN/2][2 pkts/132 bytes <-> 1 pkts/66 bytes][Goodput ratio: 0/0][0.00 sec][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
51 TCP 10.0.0.227:56885 <-> 184.25.56.53:80 [proto: 7/HTTP][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 3][cat: Web/5][2 pkts/132 bytes <-> 1 pkts/66 bytes][Goodput ratio: 0/0][0.02 sec][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
52 UDP 10.0.0.227:61613 <-> 75.75.75.75:53 [proto: 5/DNS][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 2][cat: Network/14][1 pkts/97 bytes <-> 1 pkts/97 bytes][Goodput ratio: 56/56][0.02 sec][Hostname/SNI: lb._dns-sd._udp.0.0.0.10.in-addr.arpa][::][Risk: ** Error Code **][Risk Score: 10][Risk Info: DNS Error Code NXDOMAIN][Plen Bins: 0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
53 UDP 10.0.0.227:49781 <-> 75.75.75.75:53 [proto: 5/DNS][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 2][cat: Network/14][1 pkts/69 bytes <-> 1 pkts/117 bytes][Goodput ratio: 39/64][0.02 sec][Hostname/SNI: apple.com][17.178.96.59][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
53 UDP 10.0.0.227:49781 <-> 75.75.75.75:53 [proto: 5.140/DNS.Apple][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 2][cat: Network/14][1 pkts/69 bytes <-> 1 pkts/117 bytes][Goodput ratio: 39/64][0.02 sec][Hostname/SNI: apple.com][17.178.96.59][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
54 UDP 10.0.0.227:52879 <-> 75.75.76.76:53 [proto: 5/DNS][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 2][cat: Network/14][1 pkts/82 bytes <-> 1 pkts/98 bytes][Goodput ratio: 48/57][0.04 sec][Hostname/SNI: vco.pandion.viasat.com][8.37.102.91][PLAIN TEXT (pandion)][Plen Bins: 0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
55 ICMPV6 [fe80::408:3e45:3abc:1552]:0 -> [ff02::16]:0 [proto: 102/ICMPV6][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 1][cat: Network/14][2 pkts/180 bytes -> 0 pkts/0 bytes][Goodput ratio: 22/0][1.02 sec][Plen Bins: 100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
56 UDP 10.0.0.227:51990 <-> 75.75.75.75:53 [proto: 5/DNS][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 2][cat: Network/14][1 pkts/75 bytes <-> 1 pkts/91 bytes][Goodput ratio: 43/53][0.04 sec][Hostname/SNI: mail.viasat.com][8.37.103.196][PLAIN TEXT (viasat)][Plen Bins: 0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
Expand Down
6 changes: 3 additions & 3 deletions tests/cfgs/default/result/cachefly.pcapng.out
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ LRU cache ookla: 0/0/0 (insert/search/found)
LRU cache bittorrent: 0/0/0 (insert/search/found)
LRU cache zoom: 0/0/0 (insert/search/found)
LRU cache stun: 0/0/0 (insert/search/found)
LRU cache tls_cert: 0/2/0 (insert/search/found)
LRU cache tls_cert: 0/0/0 (insert/search/found)
LRU cache mining: 0/0/0 (insert/search/found)
LRU cache msteams: 0/0/0 (insert/search/found)
LRU cache stun_zoom: 0/0/0 (insert/search/found)
Automa host: 44/1 (search/found)
Automa domain: 44/0 (search/found)
Automa host: 1/1 (search/found)
Automa domain: 1/0 (search/found)
Automa tls cert: 0/0 (search/found)
Automa risk mask: 0/0 (search/found)
Automa common alpns: 2/2 (search/found)
Expand Down
Loading

0 comments on commit 82f6dc5

Please sign in to comment.