-
Notifications
You must be signed in to change notification settings - Fork 6
/
foldingathome.yml
268 lines (259 loc) · 9.11 KB
/
foldingathome.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# https://github.com/jkataja/cfn-foldingathome
AWSTemplateFormatVersion: 2010-09-09
Description: |
Run Folding@home on spot instances to help find cure for COVID-19.
Mappings:
# Ubuntu 18.04 LTS AMI IDs as of 2020-06-18 from https://cloud-images.ubuntu.com/locator/ec2/
Ubuntu1804:
af-south-1:
ImageId: 'ami-079652134905bcbad'
ap-east-1:
ImageId: 'ami-c42464b5'
ap-northeast-1:
ImageId: 'ami-0cfa3caed4b487e77'
ap-northeast-2:
ImageId: 'ami-0d777f54156eae7d9'
ap-northeast-3:
ImageId: 'ami-056ee91a6ed694f5d'
ap-south-1:
ImageId: 'ami-02d55cb47e83a99a0'
ap-southeast-1:
ImageId: 'ami-063e3af9d2cc7fe94'
ap-southeast-2:
ImageId: 'ami-0bc49f9283d686bab'
ca-central-1:
ImageId: 'ami-065ba2b6b298ed80f'
cn-north-1:
ImageId: 'ami-0071f6f4df15863cc'
cn-northwest-1:
ImageId: 'ami-0a22b8776bb32836b'
eu-central-1:
ImageId: 'ami-0d359437d1756caa8'
eu-north-1:
ImageId: 'ami-0f920d75f0ce2c4bb'
eu-south-1:
ImageId: 'ami-08bb6fa4a2d8676d4'
eu-west-1:
ImageId: 'ami-089cc16f7f08c4457'
eu-west-2:
ImageId: 'ami-00f6a0c18edb19300'
eu-west-3:
ImageId: 'ami-0e11cbb34015ff725'
me-south-1:
ImageId: 'ami-0ca656ad4cf917e1f'
sa-east-1:
ImageId: 'ami-0faf2c48fc9c8f966'
us-east-1:
ImageId: 'ami-0ac80df6eff0e70b5'
us-east-2:
ImageId: 'ami-0a63f96e85105c6d3'
us-west-1:
ImageId: 'ami-0d705db840ec5f0c5'
us-west-2:
ImageId: 'ami-053bc2e89490c5ab7'
Parameters:
Anonymous:
Description: Folding@home fold anonymously (default true for anonymous)
Type: String
Default: 'true'
AllowedValues:
- 'true'
- 'false'
FoldingAtHomeTeam:
Description: Folding@home team number (default 0 for no team)
Type: Number
Default: 0
FoldingAtHomeUser:
Description: Folding@home user name (default Anonymous for anonymous user)
Type: String
Default: 'Anonymous'
FoldingAtHomePasskey:
Description: Folding@home pass key (default empty for anonymous user)
Type: String
Default: ''
InstanceCount:
Description: Scale-out count of g4dn.xlarge instances to run the Folding@home client
Type: Number
Default: 1
KeyName:
Description: SSH key name for ubuntu user
Type: AWS::EC2::KeyPair::KeyName
Subnets:
Description: Subnets in VPC (for example the default VPC subnets 172.31.0.0/20, 172.31.16.0/20 and 172.31.32.0/20)
Type: List<AWS::EC2::Subnet::Id>
VpcId:
Description: VPC for the stack (for example the default VPC 172.31.0.0/16)
Type: AWS::EC2::VPC::Id
SSHLocation:
AllowedPattern: "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\\/([0-9]|[1-2][0-9]|3[0-2]))$"
ConstraintDescription: CIDR block parameter must be in the form x.x.x.x/0-28
Default: '0.0.0.0/0'
Description: Network allowed to SSH to instances in public subnet (default 0.0.0.0/0 to allow from everywhere)
Type: String
Resources:
AutoScalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
LaunchTemplate:
LaunchTemplateId: !Ref LaunchTemplate
Version: !GetAtt LaunchTemplate.LatestVersionNumber
MaxSize: !Ref InstanceCount
DesiredCapacity: !Ref InstanceCount
MinSize: '0'
VPCZoneIdentifier: !Ref Subnets
Tags:
- Key: Name
Value: Folding@home
PropagateAtLaunch: True
CreationPolicy:
ResourceSignal:
Count: !Ref InstanceCount
Timeout: 'PT10M' # 10 minutes
LaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateData:
BlockDeviceMappings:
- DeviceName: '/dev/sda1'
Ebs:
DeleteOnTermination: true
VolumeSize: 20
VolumeType: 'gp2'
IamInstanceProfile:
Name: !Ref InstanceProfile
ImageId: !FindInMap [ Ubuntu1804, !Ref 'AWS::Region', ImageId ]
InstanceType: 'g4dn.xlarge'
InstanceMarketOptions:
MarketType: 'spot'
SpotOptions:
SpotInstanceType: 'one-time'
EbsOptimized: true
KeyName: !Ref KeyName
Monitoring:
Enabled: true
SecurityGroupIds:
- !Ref SecurityGroup
UserData:
Fn::Base64: !Sub |
#!/bin/bash -ex
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
# Use instance store for Folding@home
mkfs -t ext4 /dev/nvme0n1
mkdir /var/lib/fahclient
echo "/dev/nvme0n1 /var/lib/fahclient ext4 defaults,discard 0 0" >>/etc/fstab
mount -a
chown ubuntu:ubuntu /var/lib/fahclient
# Install NVidia CUDA drivers and Python
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
apt-get update
apt-get -y install cuda python-minimal python-pip
# Optimize GPU settings
nvidia-smi -ac 5001,1590
# Install cfn-tools and awscli for health signalling
pip install aws-cfn-bootstrap awscli
# Install Folding@home
wget https://download.foldingathome.org/releases/public/release/fahclient/debian-stable-64bit/v7.6/fahclient_7.6.13_amd64.deb
dpkg-deb -x fahclient_7.6.13_amd64.deb /
mkdir /etc/fahclient
cat<<END >/etc/fahclient/config.xml
<config>
<fold-anon v="${Anonymous}"/>
<user v="${FoldingAtHomeUser}"/>
<team v="${FoldingAtHomeTeam}"/>
<passkey v="${FoldingAtHomePasskey}"/>
<power v="full"/>
<gpu v="true"/>
<slot id="0" type="CPU"/>
<slot id="1" type="GPU"/>
<allow>127.0.0.1</allow>
<web-allow>127.0.0.1</web-allow>
</config>
END
cat<<END >/var/lib/fahclient/GPUs.txt
0x10de:0x1eb8:2:7:TU104GL [Tesla T4]
END
cat<<END >/usr/local/bin/asg-unhealthy
#!/bin/bash
[ "\$SERVICE_RESULT" == "success" ] && exit 0
exec /usr/local/bin/aws autoscaling set-instance-health --instance-id $(curl http://169.254.169.254/latest/meta-data/instance-id) --health-status "Unhealthy" --region ${AWS::Region}
END
chmod a+rx /usr/local/bin/asg-unhealthy
cat<<END >/etc/systemd/system/fahclient.service
[Unit]
Description=Folding@home client
After=network.target
[Service]
User=ubuntu
WorkingDirectory=/var/lib/fahclient
ExecStart=/usr/bin/FAHClient --config /etc/fahclient/config.xml
ExecStopPost=/usr/local/bin/asg-unhealthy
[Install]
WantedBy=multi-user.target
END
systemctl enable fahclient
systemctl start fahclient
# send success signal
/usr/local/bin/cfn-signal --success true --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region}
SecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Folding@home spot instance security group
VpcId: !Ref VpcId
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 22
ToPort: 22
CidrIp: !Ref SSHLocation
SecurityGroupEgress:
- IpProtocol: udp
FromPort: 53
ToPort: 53
CidrIp: 0.0.0.0/0
- IpProtocol: udp
FromPort: 123
ToPort: 123
CidrIp: 0.0.0.0/0
- IpProtocol: tcp
FromPort: 80
ToPort: 80
CidrIp: 0.0.0.0/0
- IpProtocol: tcp
FromPort: 8080
ToPort: 8080
CidrIp: 0.0.0.0/0
- IpProtocol: tcp
FromPort: 443
ToPort: 443
CidrIp: 0.0.0.0/0
InstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
Path: /
Roles:
- !Ref CustomHealthCheckRole
CustomHealthCheckRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Action:
- 'sts:AssumeRole'
Effect: 'Allow'
Principal:
Service:
- 'ec2.amazonaws.com'
Policies:
- PolicyName: 'CustomHealthCheck'
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: 'Allow'
Action: 'autoscaling:SetInstanceHealth'
Resource: !Sub 'arn:aws:autoscaling:${AWS::Region}:${AWS::AccountId}:autoScalingGroup:*'
Condition:
StringEquals:
'aws:ResourceTag/aws:cloudformation:stack-name': !Ref 'AWS::StackName'