-
Notifications
You must be signed in to change notification settings - Fork 3
/
LoadData.py
128 lines (120 loc) · 4.51 KB
/
LoadData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
"""
Created on Sun May 31 00:03:51 2020
@author: Berma
"""
#%%
import ee
ee.Initialize()
import pandas as pd
import numpy as np
import datetime
from sklearn.cluster import KMeans
#%%
def inttostr(s,n=5):
s = str(s)
add = n-len(s)
s = '0'*add+s
return(s)
def loadlith(fips):
geo = ee.FeatureCollection("TIGER/2018/Counties")
geo = geo.filter(ee.Filter.eq('STATEFP',fips[:2]))
geo = geo.filter(ee.Filter.eq('COUNTYFP',fips[2:]))
data= ee.Image('CSP/ERGo/1_0/US/lithology').select('b1')
data = data.reduceRegion(
reducer=ee.Reducer.autoHistogram(),
geometry=geo,
scale = 1000)
data = pd.DataFrame(data.getInfo()['b1'])
data =data[data[0]!=0]
data[1]=data[1]/data[1].sum()
data.columns = ['cat','percent']
data = data.set_index('cat')
return(data)
def loadclimate(fips,year,month):
date = str(year)+'-'+str(month)
geo = ee.FeatureCollection("TIGER/2018/Counties")
geo = geo.filter(ee.Filter.inList('STATEFP',list(map(lambda x:x[:2],fips))))
geo = geo.filter(ee.Filter.inList('COUNTYFP',list(map(lambda x:x[2:],fips))))
# geo = geo.filter(ee.Filter.eq('STATEFP',fips[:2]))
# geo = geo.filter(ee.Filter.eq('COUNTYFP',fips[2:]))
data = ee.ImageCollection('NASA/ORNL/DAYMET_V3')
data = data.filter(ee.Filter.date(pd.to_datetime(date), pd.to_datetime(date)+pd.DateOffset(months=1)-datetime.timedelta(days=1)))
data = data.select(['prcp','tmax','tmin'])
data = data.mean()
data = data.reduceRegion(
reducer=ee.Reducer.mean(),
geometry=geo,
scale = 10**5)
return(data)
#%%
corn = pd.read_csv('data/corn.csv')
corn['ANSI']=corn['ANSI'].apply(inttostr)
soy = pd.read_csv('data/soy.csv')
soy['ANSI']=soy['ANSI'].apply(inttostr)
wheat = pd.read_csv('data/wheat.csv')
wheat['ANSI']=wheat['ANSI'].apply(inttostr)
#%%
temp = pd.unique(corn['ANSI'].append(soy['ANSI']).append(wheat['ANSI']).values)
counties = ee.FeatureCollection("TIGER/2018/Counties")
counties = counties.filter(ee.Filter.inList('STATEFP',list(map(lambda x:x[:2],temp))))
counties = counties.filter(ee.Filter.inList('COUNTYFP',list(map(lambda x:x[2:],temp))))
counties = counties.getInfo()['features']
counties = pd.DataFrame(list(map(lambda x:x['properties'],counties)))
counties[['INTPTLON','INTPTLAT']]=counties[['INTPTLON','INTPTLAT']].applymap(float)
counties['fips']=counties['STATEFP']+counties['COUNTYFP']
#%%
soil = loadlith(temp[0])
soil.columns=[temp[0]]
for i,j in enumerate(temp[1:]):
hold = loadlith(j)
hold.columns=[j]
soil = soil.join(hold)
print(i/len(temp))
soil = soil.T.fillna(0)
#%%
hold = counties.set_index('fips')[['INTPTLAT','INTPTLON']].join(soil)#.dropna()
hold['Clust']=KMeans(100).fit(hold.values).labels_
hold.plot.scatter('INTPTLON','INTPTLAT',s=1,c='Clust')
soilclust = hold.reset_index()
soilclust.to_csv('data/soilclust.csv',index=None)
#%%
start = datetime.datetime.now()
lap = start
climate = {}
for clust in sorted(pd.unique(soilclust['Clust'])):
climate1 = ee.Dictionary()
for year in range(2000,2020):
climate2 = ee.Dictionary()
for month in range(1,13):
fips = soilclust['fips'][soilclust['Clust']==clust].values
fips= fips[:5]
climate2=climate2.set(str(month),loadclimate(fips,year,month))
climate1 = climate1.set(str(year),climate2)
climate[clust] = climate1.getInfo()
print(clust,datetime.datetime.now()-lap,datetime.datetime.now()-start)
lap = datetime.datetime.now()
#%%
temp = {}
temp1= {}
for clust in climate.keys():
temp[clust]=[]
for year in climate[clust].keys():
temp[clust].append(pd.DataFrame(climate[clust][year]).T)
temp[clust][-1].index = list(map(lambda x: year+'-'+x,temp[clust][-1].index))
temp1[clust]=temp[clust][0]
for i in temp[0][1:]:
temp1[clust]=temp1[clust].append(i)
temp1[clust].index=pd.to_datetime(temp1[clust].index)
temp1[clust]=temp1[clust].sort_index()
temp1[clust]=temp1[clust].stack().reset_index()
temp1[clust].columns=['Date','Measure',clust]
temp1[clust]=temp1[clust].set_index(['Date','Measure',])
temp2 = list(temp1.values())[0]
for i in list(temp1.values())[1:]:
temp2 = temp2.join(i)
temp2 = temp2.stack().reset_index()
temp2.columns=['Date','Measure','Clust','Val']
temp2 = temp2.set_index(['Date','Clust','Measure']).unstack()['Val']
temp2.reset_index().to_csv('data/climate.csv',index=None)
#%%