Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cloudflare IP range importer #572

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.gitpod
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@ EQUINIX_REMOTE_API_ENDPOINT = "https://tgwf-web-app-live.s3.nl-ams.scw.cloud/dat
AMAZON_PROVIDER_ID = 696
AMAZON_REMOTE_API_ENDPOINT = "https://ip-ranges.amazonaws.com/ip-ranges.json"

CLOUDFLARE_PROVIDER_ID = 779
CLOUDFLARE_REMOTE_API_ENDPOINT_IPV4 = "https://www.cloudflare.com/ips-v4/#"
CLOUDFLARE_REMOTE_API_ENDPOINT_IPV6 = "https://www.cloudflare.com/ips-v6/#"

# Uncomment this to set an explicit API URL for the api-docs page
# API_URL = "https://domain.starting-with-https.com"
3 changes: 3 additions & 0 deletions ansible/templates/import_ips_for_large_providers.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ source .venv/bin/activate
dotenv run -- ./manage.py update_networks_in_db_amazon
dotenv run -- ./manage.py update_networks_in_db_google
dotenv run -- ./manage.py update_networks_in_db_microsoft

# TODO: activate once we have found our technical contact to talk to
# dotenv run -- ./manage.py update_networks_in_db_cloudflare
Copy link
Member

@mrchrisadams mrchrisadams Jul 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the command we would run on weekly import like we do with AWS, GCP, and Microsoft

15 changes: 15 additions & 0 deletions apps/greencheck/fixtures/test_dataset_cloudflare.ipv4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
173.245.48.0/20
103.21.244.0/22
103.22.200.0/22
103.31.4.0/22
141.101.64.0/18
108.162.192.0/18
190.93.240.0/20
188.114.96.0/20
197.234.240.0/22
198.41.128.0/17
162.158.0.0/15
104.16.0.0/13
104.24.0.0/14
172.64.0.0/13
131.0.72.0/22
7 changes: 7 additions & 0 deletions apps/greencheck/fixtures/test_dataset_cloudflare.ipv6.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
2400:cb00::/32
2606:4700::/32
2803:f800::/32
2405:b500::/32
2405:8100::/32
2a06:98c0::/29
2c0f:f248::/32
1 change: 1 addition & 0 deletions apps/greencheck/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from .importer_amazon import AmazonImporter # noqa
from .importer_equinix import EquinixImporter # noqa
from .importer_microsoft import MicrosoftImporter # noqa
from .importer_cloudflare import CloudflareImporter # noqa
from .network_importer import NetworkImporter # noqa
63 changes: 63 additions & 0 deletions apps/greencheck/importers/importer_cloudflare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging
from typing import List, Tuple, Union

import requests
from django.conf import settings

from apps.accounts.models.hosting import Hostingprovider
from apps.greencheck.importers.importer_interface import ImporterProtocol
from apps.greencheck.importers.network_importer import NetworkImporter

logger = logging.getLogger(__name__)


class CloudflareImporter:
def __init__(self):
self.hosting_provider_id = settings.CLOUDFLARE_PROVIDER_ID

def process(self, list_of_addresses: list[str]):
provider = Hostingprovider.objects.get(id=settings.CLOUDFLARE_PROVIDER_ID)

network_importer = NetworkImporter(provider)
network_importer.deactivate_ips()
network_importer.deactivate_asns()
return network_importer.process_addresses(list_of_addresses)

def fetch_data_from_source(self) -> list:
"""
Fetch the contents of the two cloudflare endpoints, and return a list of
IP networks ready to be processed.
"""
try:
ipv4_response = requests.get(settings.CLOUDFLARE_REMOTE_API_ENDPOINT_IPV4)
ipv6_response = requests.get(settings.CLOUDFLARE_REMOTE_API_ENDPOINT_IPV6)
ipv4_data = ipv4_response.text
ipv6_data = ipv6_response.text

# destructure the lines of each text file, to make one longer list
return *ipv4_data.splitlines(), *ipv6_data.splitlines()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tweaked this to pull out the entries from file, so rather than returning two strings delimited by newlines like this:

ipv4_data, ipv6_data

We return a single list of strings, with each string representing an ipv4 or ipv6 address

*ipv4_data.splitlines(), *ipv6_data.splitlines()

except requests.RequestException:
logger.warning("Unable to fetch text files. Aborting early.")

def parse_to_list(self, raw_data: list[str]) -> List[Union[str, Tuple]]:
"""
Accept a list of IP networks listed in the remote text file and
return a list of IP networks.
"""
try:
list_of_ips = []
for line in raw_data:
# Filter out empty lines
if not line:
continue

# only return AS networks and IP networks
if line.startswith("AS") or line[0].isdigit():
list_of_ips.append(line)
return list_of_ips
except Exception as err:
logger.warning("Unexpected error raised")
logger.warning(err)


assert isinstance(CloudflareImporter(), ImporterProtocol)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from apps.greencheck.importers.importer_cloudflare import CloudflareImporter
from django.core.management.base import BaseCommand


class Command(BaseCommand):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This command is what executed when we call update_networks_in_db_cloudflare

def handle(self, *args, **options):
"""
Fetch the data from the Cloudflare API, parse it, and process add the IP Ranges
to our Cloudflare provider.
"""
importer = CloudflareImporter()
data = importer.fetch_data_from_source()
parsed_data = importer.parse_to_list(data)
result = importer.process(parsed_data)

update_message = (
f"Processing complete. Created {len(result['created_asns'])} ASNs,"
f"and {len(result['created_green_ips'])} IP ranges. "
f"Updated {len(result['green_asns'])} ASNs, "
f"and {len(result['green_ips'])} IP ranges. (either IPv4 and/or IPv6)"
)

self.stdout.write(update_message)
155 changes: 155 additions & 0 deletions apps/greencheck/tests/test_importer_cloudflare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import pytest
import pathlib
import json

from django.core.management import call_command
from apps.greencheck.importers import CloudflareImporter
from ..importers.network_importer import is_ip_network



@pytest.fixture
def sample_data_ipv4() -> str:
this_file = pathlib.Path(__file__)
file_path = this_file.parent.parent.joinpath("fixtures", "test_dataset_cloudflare.ipv4.txt")

with open(file_path) as ipv4s:
return ipv4s.read()

@pytest.fixture
def sample_data_ipv6() -> str:
this_file = pathlib.Path(__file__)
file_path = this_file.parent.parent.joinpath("fixtures", "test_dataset_cloudflare.ipv6.txt")
with open(file_path) as ipv6s:
return ipv6s.read()

@pytest.fixture
def sample_data_raw(sample_data_ipv6, sample_data_ipv4) -> list[str]:
"""
return the same values as as returned by the fetch_data_from_source method
"""
return *sample_data_ipv6.splitlines(), *sample_data_ipv4.splitlines()



@pytest.fixture()
def settings_with_cloudflare_provider(settings):
settings.CLOUDFLARE_PROVIDER_ID = 123
return settings

class TestCloudflareImporter:

def test_parse_to_list(self, settings_with_cloudflare_provider, hosting_provider_factory, sample_data_raw):
"""
Test the parsing function converts the json into a consisten list our
importer can process
"""

# Given: an initialised importer
importer = CloudflareImporter()

# When: I parse the published info
list_of_addresses = importer.parse_to_list(sample_data_raw)

# Then: I should see a list of IP and IPv6 addresses
for network in list_of_addresses:
assert is_ip_network(network)

@pytest.mark.django_db
def test_process_ip_import(
self,
settings_with_cloudflare_provider,
hosting_provider_factory,
sample_data_raw,
):
"""
Test that we can import the parsed and reshaped list of IP addresses.
"""

# Given: a provider standing in for our Cloudflare
fake_cf = hosting_provider_factory.create(
id=settings_with_cloudflare_provider.CLOUDFLARE_PROVIDER_ID
)
# And: an initialised importer
importer = CloudflareImporter()

# When: parse the published info, and process the import
list_of_addresses = importer.parse_to_list(sample_data_raw)
import_result = importer.process(list_of_addresses)

assert fake_cf.greencheckip_set.all().count() == len(
import_result["created_green_ips"]
)

@pytest.mark.django_db
def test_process_repeat_ip_import(
self, settings_with_cloudflare_provider, hosting_provider_factory, sample_data_raw
):
"""
Test that a second import does not duplicate ip addresses.
"""

# Given: a provider standing in for our Cloudflare
fake_cf = hosting_provider_factory.create(
id=settings_with_cloudflare_provider.CLOUDFLARE_PROVIDER_ID
)
# And: an initialised importer
importer = CloudflareImporter()

# When: parse the published info, and process the import
list_of_addresses = importer.parse_to_list(sample_data_raw)

import_result = importer.process(list_of_addresses)

# And: we have
repeat_import_result = importer.process(list_of_addresses)

assert fake_cf.greencheckip_set.all().count() == len(
import_result["created_green_ips"]
)

assert len(repeat_import_result["created_green_ips"]) == 0

deduped_green_ips = set(repeat_import_result["green_ips"])
assert fake_cf.greencheckip_set.all().count() == len(deduped_green_ips)


@pytest.mark.django_db
class TestCloudflareImportCommand:
"""
Test the management command to update the Cloudflare IP ranges
"""

def test_handle(
self,
mocker,
hosting_provider_factory,
settings_with_cloudflare_provider,
sample_data_raw,
):
# mock the call to retrieve from source, to a locally stored
# testing sample. By instead using the test sample,
# we avoid unnecessary network requests.

# identify method we want to mock
path_to_mock = (
"apps.greencheck.importers.importer_cloudflare."
"CloudflareImporter.fetch_data_from_source"
)
# Given: a provider standing in for our Cloudflare
fake_cf = hosting_provider_factory.create(
id=settings_with_cloudflare_provider.CLOUDFLARE_PROVIDER_ID
)

# define a different return when the targeted mock
# method is called
mocker.patch(
path_to_mock,
return_value=sample_data_raw,
)

# When: I run the management command to update the cloudflare ip ranges
call_command("update_networks_in_db_cloudflare")

# Then: I should see the ip ranges in the database
assert fake_cf.greencheckip_set.all().count() == 22
3 changes: 3 additions & 0 deletions greenweb/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@
"GOOGLE_DATASET_ENDPOINT", default="https://www.gstatic.com/ipranges/cloud.json"
)

CLOUDFLARE_PROVIDER_ID = env("CLOUDFLARE_PROVIDER_ID", default=None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to set some default values here for the app to start up cleanly

CLOUDFLARE_REMOTE_API_ENDPOINT_IPV4 = env("CLOUDFLARE_REMOTE_API_ENDPOINT_IPV4", default="https://www.cloudflare.com/ips-v4/")
CLOUDFLARE_REMOTE_API_ENDPOINT_IPV6 = env("CLOUDFLARE_REMOTE_API_ENDPOINT_IPV6", default="https://www.cloudflare.com/ips-v6/")

RABBITMQ_URL = env("RABBITMQ_URL", default=None)

Expand Down