diff --git a/censys/asm/inventory.py b/censys/asm/inventory.py index e123242..15ddb4d 100644 --- a/censys/asm/inventory.py +++ b/censys/asm/inventory.py @@ -18,6 +18,7 @@ def search( cursor: Optional[str] = None, sort: Optional[List[str]] = None, fields: Optional[List[str]] = None, + pages: Optional[int] = None, ) -> dict: """Search inventory data. @@ -28,6 +29,7 @@ def search( cursor (str, optional): Cursor to start search from. sort (List[str], optional): List of fields to sort by. fields (List[str], optional): List of fields to return. + pages (int, optional): Number of pages of results to return (when set to -1 returns all pages available). Returns: dict: Inventory search results. @@ -43,6 +45,9 @@ def search( if page_size is None: page_size = 50 + if pages is None: + pages = 1 + args = { "workspaces": workspaces, "pageSize": page_size, @@ -57,7 +62,27 @@ def search( if fields: args["fields"] = fields - return self._get(self.base_path, args=args) + page = 1 + next_cursor = None + hits = [] + resp = self._get(self.base_path, args=args) + next_cursor = resp.get("nextCursor") + hits.extend(resp.get("hits", [])) + # Fetch additional pages if next_cursor is available AND additional pages are requested + # Loop will exit if next_cursor is None or if the number of pages requested is reached + # Loop will exit if non-200 status code is returned + while next_cursor and (pages == -1 or page < pages): + args["cursor"] = next_cursor + resp = self._get(self.base_path, args=args) + if "nextCursor" in resp: + next_cursor = resp.get("nextCursor") + else: + next_cursor = None + hits.extend(resp.get("hits", [])) + page += 1 + + resp["hits"] = hits + return resp def aggregate( self, diff --git a/censys/cli/commands/asm.py b/censys/cli/commands/asm.py index 5e019f1..8acd613 100644 --- a/censys/cli/commands/asm.py +++ b/censys/cli/commands/asm.py @@ -557,7 +557,9 @@ def cli_execute_saved_query_by_name(args: argparse.Namespace): query = results[0]["query"] try: - res = s.search(None, query, args.page_size, None, args.sort, args.fields) + res = s.search( + None, query, args.page_size, None, args.sort, args.fields, args.pages + ) console.print_json(json.dumps(res)) except CensysAsmException: console.print("Failed to execute saved query.") @@ -579,7 +581,9 @@ def cli_execute_saved_query_by_id(args: argparse.Namespace): console.print("No saved query found with that ID.") sys.exit(1) try: - res = s.search(None, query, args.page_size, None, args.sort, args.fields) + res = s.search( + None, query, args.page_size, None, args.sort, args.fields, args.pages + ) console.print_json(json.dumps(res)) except CensysAsmException: console.print("Failed to execute saved query.") @@ -602,6 +606,7 @@ def cli_search(args: argparse.Namespace): args.cursor, args.sort, args.fields, + args.pages, ) console.print_json(json.dumps(res)) except CensysAsmException: @@ -885,6 +890,12 @@ def add_verbose(parser): type=List[str], default=[], ) + execute_saved_query_by_name_parser.add_argument( + "--pages", + help="Number of pages to return. Defaults to 1.", + type=int, + default=1, + ) add_verbose(execute_saved_query_by_name_parser) execute_saved_query_by_name_parser.set_defaults( func=cli_execute_saved_query_by_name @@ -920,6 +931,12 @@ def add_verbose(parser): type=List[str], default=[], ) + execute_saved_query_by_id_parser.add_argument( + "--pages", + help="Number of pages to return. Defaults to 1.", + type=int, + default=1, + ) add_verbose(execute_saved_query_by_id_parser) execute_saved_query_by_id_parser.set_defaults(func=cli_execute_saved_query_by_id) @@ -965,5 +982,11 @@ def add_verbose(parser): type=str, required=False, ) + search_parser.add_argument( + "--pages", + help="Number of pages to return. Defaults to 1.", + type=int, + default=1, + ) add_verbose(search_parser) search_parser.set_defaults(func=cli_search) diff --git a/pyproject.toml b/pyproject.toml index 427606e..59747b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "censys" -version = "2.2.13" +version = "2.2.14" description = "An easy-to-use and lightweight API wrapper for Censys APIs (censys.io)." authors = ["Censys, Inc. "] license = "Apache-2.0" diff --git a/tests/asm/test_inventory.py b/tests/asm/test_inventory.py index be25fb3..ec4c107 100644 --- a/tests/asm/test_inventory.py +++ b/tests/asm/test_inventory.py @@ -12,7 +12,7 @@ TEST_INVENTORY_SEARCH_JSON = { "totalHits": 0, - "nextCursor": "string", + "nextCursor": "", "previousCursor": "string", "queryDurationMillis": 0, "hits": [{}], diff --git a/tests/cli/test_asm.py b/tests/cli/test_asm.py index 95701e4..66a7d28 100644 --- a/tests/cli/test_asm.py +++ b/tests/cli/test_asm.py @@ -163,6 +163,81 @@ ], } +SEARCH_JSON_PAGE_1 = { + "totalHits": 5, + "nextCursor": "eyJmaWx0ZXIiOnt9LCJzdGFydCI6MjA3MTJ9", + "queryDurationMillis": 50, + "hits": [ + {"_details": {}, "domain": {"name": "foo.com"}, "type": "DOMAIN"}, + { + "_details": {}, + "domain": {"name": "bar.foo.com"}, + "type": "DOMAIN", + }, + ], +} + +SEARCH_JSON_PAGE_2 = { + "totalHits": 5, + "nextCursor": "eyJmaWx0ZXIiOnt9LCJzdGFydCI6MjA3MTJ8", + "previousCursor": "eyJmaWx0ZXIiOnt9LCJzdGFydCI6MjA3MTJ9", + "queryDurationMillis": 50, + "hits": [ + { + "_details": {}, + "domain": {"name": "b.foo.com"}, + "type": "DOMAIN", + }, + { + "_details": {}, + "domain": {"name": "a.foo.com"}, + "type": "DOMAIN", + }, + ], +} + +SEARCH_JSON_PAGE_3 = { + "totalHits": 5, + "previousCursor": "eyJmaWx0ZXIiOnt9LCJzdGFydCI6MjA3MTJ8", + "queryDurationMillis": 50, + "hits": [ + { + "_details": {}, + "domain": {"name": "r.foo.com"}, + "type": "DOMAIN", + }, + ], +} + +SEARCH_JSON_PAGINATED = { + "totalHits": 5, + "previousCursor": "eyJmaWx0ZXIiOnt9LCJzdGFydCI6MjA3MTJ8", + "queryDurationMillis": 50, + "hits": [ + {"_details": {}, "domain": {"name": "foo.com"}, "type": "DOMAIN"}, + { + "_details": {}, + "domain": {"name": "bar.foo.com"}, + "type": "DOMAIN", + }, + { + "_details": {}, + "domain": {"name": "b.foo.com"}, + "type": "DOMAIN", + }, + { + "_details": {}, + "domain": {"name": "a.foo.com"}, + "type": "DOMAIN", + }, + { + "_details": {}, + "domain": {"name": "r.foo.com"}, + "type": "DOMAIN", + }, + ], +} + SAVED_QUERY_ID = "12345" SAVED_QUERY_NAME = "Test query" @@ -2191,3 +2266,119 @@ def test_search_failed(self): # Assertions assert "Failed to execute query." in temp_stdout.getvalue() + + def test_search_paginated_all(self): + # Mock + mock_request = self.mocker.patch("censys.asm.api.CensysAsmAPI.get_workspace_id") + mock_request.return_value = WORKSPACE_ID + self.patch_args( + [ + "censys", + "asm", + "search", + "--query", + "domain: foo.com", + "--page-size", + "2", + "--pages", + "-1", + ], + asm_auth=True, + ) + self.responses.add( + responses.GET, + INVENTORY_URL + "/v1", + status=200, + json=SEARCH_JSON_PAGE_1, + match=[ + matchers.query_param_matcher( + { + "workspaces": WORKSPACE_ID, + "query": "domain: foo.com", + "pageSize": 2, + } + ) + ], + ) + self.responses.add( + responses.GET, + INVENTORY_URL + "/v1", + status=200, + json=SEARCH_JSON_PAGE_2, + match=[ + matchers.query_param_matcher( + { + "workspaces": WORKSPACE_ID, + "cursor": SEARCH_JSON_PAGE_1["nextCursor"], + "query": "domain: foo.com", + "pageSize": 2, + } + ) + ], + ) + self.responses.add( + responses.GET, + INVENTORY_URL + "/v1", + status=200, + json=SEARCH_JSON_PAGE_3, + match=[ + matchers.query_param_matcher( + { + "workspaces": WORKSPACE_ID, + "cursor": SEARCH_JSON_PAGE_2["nextCursor"], + "query": "domain: foo.com", + "pageSize": 2, + } + ) + ], + ) + + # Actual call + temp_stdout = StringIO() + with contextlib.redirect_stdout(temp_stdout): + cli_main() + + # Assertions + actual_json = json.loads(temp_stdout.getvalue()) + assert actual_json == SEARCH_JSON_PAGINATED + + def test_search_paginated_partial(self): + # Mock + mock_request = self.mocker.patch("censys.asm.api.CensysAsmAPI.get_workspace_id") + mock_request.return_value = WORKSPACE_ID + self.patch_args( + [ + "censys", + "asm", + "search", + "--query", + "domain: foo.com", + "--page-size", + "2", + ], + asm_auth=True, + ) + self.responses.add( + responses.GET, + INVENTORY_URL + "/v1", + status=200, + json=SEARCH_JSON_PAGE_1, + match=[ + matchers.query_param_matcher( + { + "workspaces": WORKSPACE_ID, + "query": "domain: foo.com", + "pageSize": 2, + } + ) + ], + ) + + # Actual call + temp_stdout = StringIO() + with contextlib.redirect_stdout(temp_stdout): + cli_main() + + # Assertions + actual_json = json.loads(temp_stdout.getvalue()) + assert actual_json == SEARCH_JSON_PAGE_1