diff --git a/README.md b/README.md index f951433..ad92785 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ The spider `download.py`, dumps the response body as unicode to the files. The l bookworm Spider to scrape locally hosted site broadworm Broad crawl spider to scrape locally hosted sites cssbench Micro-benchmark for extraction using css + httpbench Scrapy HTTP download handler test itemloader Item loader benchmarker linkextractor Micro-benchmark for LinkExtractor() urlparseprofile Urlparse benchmarker diff --git a/bench.py b/bench.py index 0fe725c..5802f3b 100644 --- a/bench.py +++ b/bench.py @@ -168,6 +168,24 @@ def cssbench(obj): obj.vmprof) +@cli.command() +@click.pass_obj +def httpbench(obj): + """Scrapy HTTP download handler test""" + scrapy_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'execute.py') + settings = " ".join("-s '%s'" % s for s in obj.set) + arg = "%s runspider httpbench.py %s" % (scrapy_path, settings) + + calculator( + "HTTP Benchmark", + arg, + obj.n_runs, + obj.only_result, + obj.upload_result, + obj.vmprof + ) + + @cli.command() @click.pass_obj def xpathbench(obj): diff --git a/httpbench.py b/httpbench.py new file mode 100644 index 0000000..f557618 --- /dev/null +++ b/httpbench.py @@ -0,0 +1,38 @@ +from datetime import datetime + +from scrapy import Request, Spider +from six import text_type + + +class HTTPSpider(Spider): + """Spider equivalent to https://http1.golang.org/gophertiles + + Use the DOWNLOAD_HANDLERS setting to set the download handler to test. + """ + name = 'httpbench' + + def start_requests(self): + self.response_count = 0 + self.start_time = datetime.utcnow() + version = ( + '2' if '2' in self.settings.getwithbase('DOWNLOAD_HANDLERS')['https'] + else '1' + ) + for x in range(14): + for y in range(11): + yield Request( + 'https://http{version}.golang.org/gophertiles?x={x}&y={y}&latency=0' + .format( + version=version, + x=x, + y=y, + ) + ) + + def parse(self, response): + self.response_count += 1 + + def close(self, reason): + run_time = datetime.utcnow() - self.start_time + with open("Benchmark.txt", 'w') as f: + f.write(text_type(self.response_count / run_time.total_seconds()))