Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plugin "FilterByURLRegexPlugin" #397

Merged
merged 48 commits into from
Jul 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
aa27afe
Initial draft of filter_by_url_regex.py
mikenye Jul 7, 2020
e4ae5fb
Add FilterByURLRegexPlugin
mikenye Jul 7, 2020
502a26a
Fix dictionary key & add logging
mikenye Jul 7, 2020
898f22a
Add proper logging
mikenye Jul 7, 2020
cf5505e
Add better logging
mikenye Jul 7, 2020
a05a80e
Add logging
mikenye Jul 7, 2020
50c833c
move code to handle_client_request
mikenye Jul 7, 2020
46af854
development logging
mikenye Jul 7, 2020
e3d1bb8
development
mikenye Jul 7, 2020
24d9b4b
development
mikenye Jul 7, 2020
f57006a
development
mikenye Jul 7, 2020
026f782
dev
mikenye Jul 7, 2020
a73e95d
dev
mikenye Jul 7, 2020
6f75559
dev
mikenye Jul 7, 2020
0dc83fc
dev
mikenye Jul 7, 2020
663f35a
dev
mikenye Jul 7, 2020
6b05bb9
dev
mikenye Jul 7, 2020
22772b0
dev
mikenye Jul 7, 2020
a8a6458
dev
mikenye Jul 7, 2020
5a657ee
dev
mikenye Jul 7, 2020
bd0b73b
dev
mikenye Jul 7, 2020
c527c05
dev
mikenye Jul 7, 2020
7d47655
Fix blocked log
mikenye Jul 7, 2020
9870b53
Add to FILTER_LIST, some tidy up
mikenye Jul 7, 2020
4c4ab47
Update FILTER_LIST
mikenye Jul 7, 2020
b063938
dev
mikenye Jul 7, 2020
96af345
remove scheme from url
mikenye Jul 7, 2020
56ab718
Add to FILTER_LIST
mikenye Jul 7, 2020
83ff4a5
Add to FILTER_LIST
mikenye Jul 7, 2020
2552a7c
Update FILTER_LIST
mikenye Jul 7, 2020
8b0a0b0
commenting
mikenye Jul 7, 2020
f1785cc
Update FILTER_LIST
mikenye Jul 8, 2020
bd8e232
Merge branch 'develop' into develop
abhinavsingh Jul 10, 2020
6a91149
After autopep8
mikenye Jul 12, 2020
95328b5
Fix Anomalous backslash in string (pep8)
mikenye Jul 12, 2020
c19398f
Address code quality checks - flake8 F401 & W605
mikenye Jul 12, 2020
a8c06c4
Address flake8 errors
mikenye Jul 12, 2020
2254248
Attempt to fix flake8 errors
mikenye Jul 12, 2020
0e4ec15
Merge branch 'develop' into develop
abhinavsingh Jul 12, 2020
d6bdd1a
Merge branch 'develop' into develop
mikenye Jul 13, 2020
f310171
Fix linting issues
mikenye Jul 13, 2020
b125dc5
Address flake8 W292
mikenye Jul 13, 2020
9fb1795
Attempt to create tests
mikenye Jul 13, 2020
dd37f25
Add FilterByURLRegexPlugin
mikenye Jul 13, 2020
8b12c0b
Rename test
mikenye Jul 13, 2020
54d37af
Work on tests
mikenye Jul 13, 2020
19ead4a
Work on tests
mikenye Jul 13, 2020
1aadca0
Work on tests
mikenye Jul 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions proxy/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .reverse_proxy import ReverseProxyPlugin
from .proxy_pool import ProxyPoolPlugin
from .filter_by_client_ip import FilterByClientIpPlugin
from .filter_by_url_regex import FilterByURLRegexPlugin
from .modify_chunk_response import ModifyChunkResponsePlugin

__all__ = [
Expand All @@ -35,4 +36,5 @@
'ProxyPoolPlugin',
'FilterByClientIpPlugin',
'ModifyChunkResponsePlugin',
'FilterByURLRegexPlugin',
]
136 changes: 136 additions & 0 deletions proxy/plugin/filter_by_url_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
"""
proxy.py
~~~~~~~~
⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on
Network monitoring, controls & Application development, testing, debugging.

:copyright: (c) 2013-present by Abhinav Singh and contributors.
:license: BSD, see LICENSE for more details.
"""

import logging

from typing import Optional, List, Dict, Any

from ..http.exception import HttpRequestRejected
from ..http.parser import HttpParser
from ..http.codes import httpStatusCodes
from ..http.proxy import HttpProxyBasePlugin
from ..common.utils import text_

import re

logger = logging.getLogger(__name__)


class FilterByURLRegexPlugin(HttpProxyBasePlugin):
"""
Drop traffic by inspecting request URL,
checking against a list of regular expressions,
then returning a HTTP status code.
"""

FILTER_LIST: List[Dict[str, Any]] = [
{
'regex': b'tpc.googlesyndication.com/simgad/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google image ads',
},
{
'regex': b'tpc.googlesyndication.com/sadbundle/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google animated ad bundles',
},
{
'regex': b'pagead\\d+.googlesyndication.com/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google tracking',
},
{
'regex': b'(www){0,1}.google-analytics.com/r/collect\\?.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google tracking',
},
{
'regex': b'(www){0,1}.facebook.com/tr/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Facebook tracking',
},
{
'regex': b'tpc.googlesyndication.com/daca_images/simgad/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google image ads',
},
{
'regex': b'.*.2mdn.net/videoplayback/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Twitch.tv video ads',
},
{
'regex': b'(www.){0,1}google.com(.*)/pagead/.*',
'status_code': httpStatusCodes.NOT_FOUND,
'notes': 'Google ads',
},
]

def before_upstream_connection(
self, request: HttpParser) -> Optional[HttpParser]:
return request

def handle_client_request(
self, request: HttpParser) -> Optional[HttpParser]:

# determine host
request_host = None
if request.host:
request_host = request.host
else:
if b'host' in request.headers:
request_host = request.header(b'host')

if not request_host:
logger.error("Cannot determine host")
return request

# build URL
url = b'%s%s' % (
request_host,
request.path,
)

# check URL against list
rule_number = 1
for blocked_entry in self.FILTER_LIST:

# if regex matches on URL
if re.search(text_(blocked_entry['regex']), text_(url)):

# log that the request has been filtered
logger.info("Blocked: %r with status_code '%r' by rule number '%r'" % (
text_(url),
blocked_entry['status_code'],
rule_number,
))

# close the connection with the status code from the filter
# list
raise HttpRequestRejected(
status_code=blocked_entry['status_code'],
headers={b'Connection': b'close'},
reason=b'Blocked',
)

# stop looping through filter list
break

# increment rule number
rule_number += 1

return request

def handle_upstream_chunk(self, chunk: memoryview) -> memoryview:
return chunk

def on_upstream_connection_close(self) -> None:
pass
27 changes: 27 additions & 0 deletions tests/plugin/test_http_proxy_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,30 @@ def closed() -> bool:
httpStatusCodes.OK,
reason=b'OK', body=b'Hello from man in the middle')
)

@mock.patch('proxy.http.proxy.server.TcpServerConnection')
def test_filter_by_url_regex_plugin(
self, mock_server_conn: mock.Mock) -> None:
request = build_http_request(
b'GET', b'http://www.facebook.com/tr/',
headers={
b'Host': b'www.facebook.com',
}
)
self._conn.recv.return_value = request
self.mock_selector.return_value.select.side_effect = [
[(selectors.SelectorKey(
fileobj=self._conn,
fd=self._conn.fileno,
events=selectors.EVENT_READ,
data=None), selectors.EVENT_READ)], ]
self.protocol_handler.run_once()

self.assertEqual(
self.protocol_handler.client.buffer[0].tobytes(),
build_http_response(
status_code=httpStatusCodes.NOT_FOUND,
reason=b'Blocked',
headers={b'Connection': b'close'},
)
)
4 changes: 3 additions & 1 deletion tests/plugin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from proxy.http.proxy import HttpProxyBasePlugin

from proxy.plugin import ModifyPostDataPlugin, ProposedRestApiPlugin, RedirectToCustomServerPlugin, \
FilterByUpstreamHostPlugin, CacheResponsesPlugin, ManInTheMiddlePlugin
FilterByUpstreamHostPlugin, CacheResponsesPlugin, ManInTheMiddlePlugin, FilterByURLRegexPlugin


def get_plugin_by_test_name(test_name: str) -> Type[HttpProxyBasePlugin]:
Expand All @@ -29,4 +29,6 @@ def get_plugin_by_test_name(test_name: str) -> Type[HttpProxyBasePlugin]:
plugin = CacheResponsesPlugin
elif test_name == 'test_man_in_the_middle_plugin':
plugin = ManInTheMiddlePlugin
elif test_name == 'test_filter_by_url_regex_plugin':
plugin = FilterByURLRegexPlugin
return plugin