-
Notifications
You must be signed in to change notification settings - Fork 0
/
GeocodeScript.py
134 lines (115 loc) · 5.57 KB
/
GeocodeScript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
"""
Use Google Maps API to geocode location data
Outputs to excel file containing all lats/longs
Reference code from:
https://gist.github.com/shanealynn/033c8a3cacdba8ce03cbe116225ced31
"""
import pandas as pd
import requests
import logging
import time
logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)
df = pd.read_csv('Parking_Tickets.csv')
unique = df.Location.unique()
fixed = [str(x) + ' Charlottesville, VA' for x in unique]
#API_KEY = 'Enter your own API key'
BACKOFF_TIME = 30
RETURN_FULL_RESULTS = False
def get_google_results(address, api_key=None, return_full_response=False):
"""
Get geocode results from Google Maps Geocoding API.
Note, that in the case of multiple google geocode reuslts, this function returns details of the FIRST result.
@param address: String address as accurate as possible. For Example "18 Grafton Street, Dublin, Ireland"
@param api_key: String API key if present from google.
If supplied, requests will use your allowance from the Google API. If not, you
will be limited to the free usage of 2500 requests per day.
@param return_full_response: Boolean to indicate if you'd like to return the full response from google. This
is useful if you'd like additional location details for storage or parsing later.
"""
# Set up your Geocoding url
geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(address)
if api_key is not None:
geocode_url = geocode_url + "&key={}".format(api_key)
# Ping google for the reuslts:
results = requests.get(geocode_url)
# Results will be in JSON format - convert to dict using requests functionality
results = results.json()
# if there's no results or an error, return empty results.
if len(results['results']) == 0:
output = {
"formatted_address" : None,
"latitude": None,
"longitude": None,
"accuracy": None,
"google_place_id": None,
"type": None,
"postcode": None
}
else:
answer = results['results'][0]
output = {
"formatted_address" : answer.get('formatted_address'),
"latitude": answer.get('geometry').get('location').get('lat'),
"longitude": answer.get('geometry').get('location').get('lng'),
"accuracy": answer.get('geometry').get('location_type'),
"google_place_id": answer.get("place_id"),
"type": ",".join(answer.get('types')),
"postcode": ",".join([x['long_name'] for x in answer.get('address_components')
if 'postal_code' in x.get('types')])
}
# Append some other details:
output['input_string'] = address
output['number_of_results'] = len(results['results'])
output['status'] = results.get('status')
if return_full_response is True:
output['response'] = results
return output
#------------------ PROCESSING LOOP -----------------------------
# Ensure, before we start, that the API key is ok/valid, and internet access is ok
test_result = get_google_results("London, England", API_KEY, RETURN_FULL_RESULTS)
if (test_result['status'] != 'OK') or (test_result['formatted_address'] != 'London, UK'):
logger.warning("There was an error when testing the Google Geocoder.")
raise ConnectionError('Problem with test results from Google Geocode - check your API key and internet connection.')
# Create a list to hold results
results = []
# Go through each address in turn
for address in fixed:
# While the address geocoding is not finished:
geocoded = False
while geocoded is not True:
# Geocode the address with google
try:
geocode_result = get_google_results(address, API_KEY, return_full_response=RETURN_FULL_RESULTS)
except Exception as e:
logger.exception(e)
logger.error("Major error with {}".format(address))
logger.error("Skipping!")
geocoded = True
# If we're over the API limit, backoff for a while and try again later.
if geocode_result['status'] == 'OVER_QUERY_LIMIT':
logger.info("Hit Query Limit! Backing off for a bit.")
time.sleep(BACKOFF_TIME) # sleep for 30 seconds
geocoded = False
else:
# If we're ok with API use, save the results
# Note that the results might be empty / non-ok - log this
if geocode_result['status'] != 'OK':
logger.warning("Error geocoding {}: {}".format(address, geocode_result['status']))
logger.debug("Geocoded: {}: {}".format(address, geocode_result['status']))
results.append(geocode_result)
geocoded = True
# Print status every 100 addresses
if len(results) % 100 == 0:
logger.info("Completed {} of {} address".format(len(results), len(fixed)))
# Every 500 addresses, save progress to file(in case of a failure so you have something!)
if len(results) % 500 == 0:
pd.DataFrame(results).to_csv("{}_bak".format('hopethisworks.csv'))
# All done
logger.info("Finished geocoding all addresses")
# Write the full results to csv using the pandas library.
pd.DataFrame(results).to_csv('outputgeocoder.csv', encoding='utf8')