From 0739db60535b1818edba9128aa4dac551084cfca Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Fri, 12 Nov 2021 03:35:23 +0530 Subject: [PATCH] Add `--proxy-pool` flag (#727) * Add `--proxy-pool` flag * lint checks --- README.md | 103 ++++++++++++++++++++----------------- proxy/plugin/proxy_pool.py | 29 +++++++---- 2 files changed, 74 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 5fb7eb2046..07cc131586 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ 99% in 0.0185 secs Status code distribution: - [200] 100000 responses + [200] 10000 responses ``` - Lightweight @@ -695,9 +695,6 @@ Response body `Hello from man in the middle` is sent by our plugin. Forward incoming proxy requests to a set of upstream proxy servers. -By default, `ProxyPoolPlugin` is hard-coded to use -`localhost:9000` and `localhost:9001` as upstream proxy server. - Let's start upstream proxies first. Start `proxy.py` on port `9000` and `9001` @@ -710,11 +707,14 @@ Start `proxy.py` on port `9000` and `9001` ❯ proxy --port 9001 ``` -Now, start `proxy.py` with `ProxyPoolPlugin` (on default `8899` port): +Now, start `proxy.py` with `ProxyPoolPlugin` (on default `8899` port), +pointing to our upstream proxies at `9000` and `9001` port. ```console ❯ proxy \ - --plugins proxy.plugin.ProxyPoolPlugin + --plugins proxy.plugin.ProxyPoolPlugin \ + --proxy-pool localhost:9000 \ + --proxy-pool localhost:9001 ``` Make a curl request via `8899` proxy: @@ -1955,10 +1955,11 @@ usage: proxy [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--threa [--cert-file CERT_FILE] [--disable-headers DISABLE_HEADERS] [--server-recvbuf-size SERVER_RECVBUF_SIZE] [--basic-auth BASIC_AUTH] [--cache-dir CACHE_DIR] - [--filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS] [--enable-web-server] - [--enable-static-server] [--static-server-dir STATIC_SERVER_DIR] + [--filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS] + [--enable-web-server] [--enable-static-server] + [--static-server-dir STATIC_SERVER_DIR] [--min-compression-length MIN_COMPRESSION_LENGTH] [--pac-file PAC_FILE] - [--pac-file-url-path PAC_FILE_URL_PATH] + [--pac-file-url-path PAC_FILE_URL_PATH] [--proxy-pool PROXY_POOL] [--filtered-client-ips FILTERED_CLIENT_IPS] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] @@ -1967,31 +1968,32 @@ proxy.py v2.4.0 options: -h, --help show this help message and exit - --enable-events Default: False. Enables core to dispatch lifecycle events. - Plugins can be used to subscribe for core events. + --enable-events Default: False. Enables core to dispatch lifecycle + events. Plugins can be used to subscribe for core events. --enable-conn-pool Default: False. (WIP) Enable upstream connection pooling. --threadless Default: True. Enabled by default on Python 3.8+ (mac, - linux). When disabled a new thread is spawned to handle each - client connection. + linux). When disabled a new thread is spawned to handle + each client connection. --threaded Default: False. Disabled by default on Python < 3.8 and - windows. When enabled a new thread is spawned to handle each - client connection. + windows. When enabled a new thread is spawned to handle + each client connection. --num-workers NUM_WORKERS Defaults to number of CPU cores. - --backlog BACKLOG Default: 100. Maximum number of pending connections to proxy - server + --backlog BACKLOG Default: 100. Maximum number of pending connections to + proxy server --hostname HOSTNAME Default: ::1. Server IP address. --port PORT Default: 8899. Server port. --unix-socket-path UNIX_SOCKET_PATH - Default: None. Unix socket path to use. When provided --host - and --port flags are ignored + Default: None. Unix socket path to use. When provided + --host and --port flags are ignored --num-acceptors NUM_ACCEPTORS Defaults to number of CPU cores. --version, -v Prints proxy.py version. --log-level LOG_LEVEL Valid options: DEBUG, INFO (default), WARNING, ERROR, - CRITICAL. Both upper and lowercase values are allowed. You - may also simply use the leading character e.g. --log-level d + CRITICAL. Both upper and lowercase values are allowed. + You may also simply use the leading character e.g. --log- + level d --log-file LOG_FILE Default: sys.stdout. Log file destination. --log-format LOG_FORMAT Log format for Python logger. @@ -1999,12 +2001,12 @@ options: Default: 1024. Maximum number of files (TCP connections) that proxy.py can open concurrently. --plugins PLUGINS [PLUGINS ...] - Comma separated plugins. You may use --plugins flag multiple - times. + Comma separated plugins. You may use --plugins flag + multiple times. --enable-dashboard Default: False. Enables proxy.py dashboard. --work-klass WORK_KLASS - Default: proxy.http.HttpProtocolHandler. Work klass to use - for work execution. + Default: proxy.http.HttpProtocolHandler. Work klass to + use for work execution. --pid-file PID_FILE Default: None. Save "parent" process ID to a file. --client-recvbuf-size CLIENT_RECVBUF_SIZE Default: 1 MB. Maximum amount of data received from the @@ -2014,31 +2016,32 @@ options: encryption with clients. If used, must also pass --cert- file. --timeout TIMEOUT Default: 10.0. Number of seconds after which an inactive - connection must be dropped. Inactivity is defined by no data - sent or received by the client. + connection must be dropped. Inactivity is defined by no + data sent or received by the client. --disable-http-proxy Default: False. Whether to disable proxy.HttpProxyPlugin. --ca-key-file CA_KEY_FILE Default: None. CA key to use for signing dynamically - generated HTTPS certificates. If used, must also pass --ca- - cert-file and --ca-signing-key-file + generated HTTPS certificates. If used, must also pass + --ca-cert-file and --ca-signing-key-file --ca-cert-dir CA_CERT_DIR Default: ~/.proxy.py. Directory to store dynamically - generated certificates. Also see --ca-key-file, --ca-cert- - file and --ca-signing-key-file + generated certificates. Also see --ca-key-file, --ca- + cert-file and --ca-signing-key-file --ca-cert-file CA_CERT_FILE Default: None. Signing certificate to use for signing - dynamically generated HTTPS certificates. If used, must also - pass --ca-key-file and --ca-signing-key-file - --ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv310/lib/python - 3.10/site-packages/certifi/cacert.pem. Provide path to + dynamically generated HTTPS certificates. If used, must + also pass --ca-key-file and --ca-signing-key-file + --ca-file CA_FILE Default: /Users/abhinavsingh/Dev/proxy.py/venv310/lib/pyt + hon3.10/site-packages/certifi/cacert.pem. Provide path to custom CA bundle for peer certificate verification --ca-signing-key-file CA_SIGNING_KEY_FILE - Default: None. CA signing key to use for dynamic generation - of HTTPS certificates. If used, must also pass --ca-key-file - and --ca-cert-file + Default: None. CA signing key to use for dynamic + generation of HTTPS certificates. If used, must also pass + --ca-key-file and --ca-cert-file --cert-file CERT_FILE - Default: None. Server certificate to enable end-to-end TLS - encryption with clients. If used, must also pass --key-file. + Default: None. Server certificate to enable end-to-end + TLS encryption with clients. If used, must also pass + --key-file. --disable-headers DISABLE_HEADERS Default: None. Comma separated list of headers to remove before dispatching client request to upstream server. @@ -2053,9 +2056,10 @@ options: Default: A temporary directory. Flag only applicable when cache plugin is used with on-disk storage. --filtered-upstream-hosts FILTERED_UPSTREAM_HOSTS - Default: Blocks Facebook. Comma separated list of IPv4 and - IPv6 addresses. - --enable-web-server Default: False. Whether to enable proxy.HttpWebServerPlugin. + Default: Blocks Facebook. Comma separated list of IPv4 + and IPv6 addresses. + --enable-web-server Default: False. Whether to enable + proxy.HttpWebServerPlugin. --enable-static-server Default: False. Enable inbuilt static file server. Optionally, also use --static-server-dir to serve static @@ -2064,16 +2068,18 @@ options: folder. --static-server-dir STATIC_SERVER_DIR Default: "public" folder in directory where proxy.py is - placed. This option is only applicable when static server is - also enabled. See --enable-static-server. + placed. This option is only applicable when static server + is also enabled. See --enable-static-server. --min-compression-length MIN_COMPRESSION_LENGTH Default: 20 bytes. Sets the minimum length of a response that will be compressed (gzipped). --pac-file PAC_FILE A file (Proxy Auto Configuration) or string to serve when - the server receives a direct file request. Using this option - enables proxy.HttpWebServerPlugin. + the server receives a direct file request. Using this + option enables proxy.HttpWebServerPlugin. --pac-file-url-path PAC_FILE_URL_PATH Default: /. Web server path to serve the PAC file. + --proxy-pool PROXY_POOL + List of upstream proxies to use in the pool --filtered-client-ips FILTERED_CLIENT_IPS Default: 127.0.0.1,::1. Comma separated list of IPv4 and IPv6 addresses. @@ -2085,7 +2091,8 @@ options: protection) or "family" (for malware and adult content protection) -Proxy.py not working? Report at: https://github.com/abhinavsingh/proxy.py/issues/new +Proxy.py not working? Report at: +https://github.com/abhinavsingh/proxy.py/issues/new ``` # Changelog diff --git a/proxy/plugin/proxy_pool.py b/proxy/plugin/proxy_pool.py index 6117046f51..7720c46cd7 100644 --- a/proxy/plugin/proxy_pool.py +++ b/proxy/plugin/proxy_pool.py @@ -16,6 +16,7 @@ from ..core.connection.server import TcpServerConnection from ..common.types import Readables, Writables +from ..common.flag import flags from ..http.exception import HttpProtocolException from ..http.proxy import HttpProxyBasePlugin from ..http.parser import HttpParser @@ -34,6 +35,22 @@ '{upstream_proxy_host}:{upstream_proxy_port} - ' + \ '{response_bytes} bytes - {connection_time_ms} ms' +# Run two separate instances of proxy.py +# on port 9000 and 9001 BUT WITHOUT ProxyPool plugin +# to avoid infinite loops. +DEFAULT_PROXY_POOL: List[str] = [ + # 'localhost:9000', + # 'localhost:9001', +] + +flags.add_argument( + '--proxy-pool', + action='append', + nargs=1, + default=DEFAULT_PROXY_POOL, + help='List of upstream proxies to use in the pool', +) + class ProxyPoolPlugin(HttpProxyBasePlugin): """Proxy pool plugin simply acts as a proxy adapter for proxy.py itself. @@ -41,14 +58,6 @@ class ProxyPoolPlugin(HttpProxyBasePlugin): Imagine this plugin as setting up proxy settings for proxy.py instance itself. All incoming client requests are proxied to configured upstream proxies.""" - # Run two separate instances of proxy.py - # on port 9000 and 9001 BUT WITHOUT ProxyPool plugin - # to avoid infinite loops. - UPSTREAM_PROXY_POOL = [ - ('localhost', 9000), - ('localhost', 9001), - ] - def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.upstream: Optional[TcpServerConnection] = None @@ -99,10 +108,10 @@ def before_upstream_connection( # a fresh upstream proxy connection for each client request. # # Implement your own logic here e.g. round-robin, least connection etc. - endpoint = random.choice(self.UPSTREAM_PROXY_POOL) + endpoint = random.choice(self.flags.proxy_pool)[0].split(':') logger.debug('Using endpoint: {0}:{1}'.format(*endpoint)) self.upstream = TcpServerConnection( - endpoint[0], endpoint[1], + endpoint[0], int(endpoint[1]), ) try: self.upstream.connect()