-
Notifications
You must be signed in to change notification settings - Fork 0
/
importES.js
267 lines (224 loc) · 7.52 KB
/
importES.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
//////////////////////////////////////////////////
//Read:Parse:Write and index in ES (importES.js)//
//////////////////////////////////////////////////
////////////////
//Contributers//
////////////////
///////////////////////////////////////////////
// NAME DATE CHANGE
// Dillon Dragomir 06-03-2017 Initalized folder
// Dillon Dragomir 06-04-2017 Fixed Functionality with sending bulk requests to elastic search
///////////////////////////////////////////////
////////////////
//DEPENDANCIES//
////////////////
// Require Dependancies
var elasticsearch = require('elasticsearch'),
fs = require('fs'),
xlsxj = require("xlsx-to-json");
/////////////
//VARIABLES//
/////////////
var Date = "20190806",
Upload_time = 4,
Player,
GameType = "Sample"
DataType = "Example",
fileName = Date + "_" + GameType + "_" + DataType;
var globalPlayerInt,
idSum = 0;
//New json object needs created for each loop of the for loop
// use for loop to get all players
for(var loop = 1; loop < 23; loop++){
// Need new object each time
var updatedFileName;
//Get Player number
if(loop<10){
Player = "0" + loop;
} else {
Player = loop;
}
//Correct fileName
updatedFileName = Player + "_" + fileName;
// CHECK FILEPATH
// Make sure the file exists before running data parsing
if (fs.existsSync("csv/" + updatedFileName + ".xlsx")) {
read_file(Player,updatedFileName);
} else {
console.log(updatedFileName + " does not exist.");
}
}
// Done executing for loop index data into ES
afterRetrieval();
// Call function from inside code
function write_file(L,Up,JS) {
// VERIFY FILEPATH
fs.writeFile(("json/" + L + "-" + Up + ".json"), JSON.stringify(JS), function (err) {
if (err) {
return console.log(err);
}
});
}
function read_file(PlayerR,uFN){
var json;
//USES FILEPATH DECLARED ABOVE
xlsxj({
input: ("csv/" + uFN + ".xlsx"),
output: null //Overwrite this output
}, function(err, result) {
if(err) {
console.error(err);
}else {
json = result;
add_data(PlayerR, Upload_time, json);
}
});
}
function add_data(Pl,Ut,json_add) {
// Loop through json and add needed parameters
for(var i = 0; i < json_add.length; i++) {
json_add[i].Date = Date;
json_add[i].Player = Pl;
json_add[i].GameType = GameType;
json_add[i].DataType = DataType;
}
// Call write function now
write_file(Pl,Ut,json_add)
}
// Wait for data to be read parsed and wrote before indexing into elastic search
function afterRetrieval(){
// For loop for each player
for(var i = 1; i < 23; i++){
globalPlayerInt = (i-1);
//Add zero to beggining if less than 1o
var Player,
pubs;
if(i<10){
Player = "0" + i;
} else {
Player = i;
}
//Check whether filepath is valid
// USES FILEPATH AGAIN
if (fs.existsSync("json/" + Player + "-" + Upload_time + ".json")) {
// Read file into variable
pubs = JSON.parse(fs.readFileSync("json/" + Player + "-" + Upload_time + ".json"));
// Function call that sends data to ES
sendES(pubs,Player);
} else {
console.log(Player + "_" + fileName + " does not exist.");
}
}
}
// MAKES BULK REQUEST
// Function calls elastic search database
function sendES(pubs,Player){
// Create new client
// VERIFY CLIENT IS ON LOCALHOST:9200
var client = new elasticsearch.Client({ // default is fine for me, change as you see fit
host: 'localhost:9200'
//log: 'trace'
});
var bulk_request = new Array(),
jsonData;
jsonData = JSON.stringify({"index":{"_index":"women_soccer","_type":"sample","_id":"1"}});
// Create new array of JSON for bulk request
for(var j = 0; j < pubs.length; j++){
bulk_request[j*2] = JSON.parse(jsonData);
bulk_request[j*2].index._index = "women_soccer";
bulk_request[j*2].index._type = fileName;
bulk_request[j*2].index._id = j + idSum;
//Parse time
if(pubs[j].Time.substr(9,10) == "PM"){
// Add 12 to make military time
pubs[j]['timestamp'] = pubs[j].Date + " " + (parseInt(pubs[j].Time.substr(0,1)) + 12) + pubs[j].Time.substr(2,7);
} else {
// Must be AM
pubs[j]['timestamp'] = pubs[j].Date + " " + pubs[j].Time.substr(0,7);
}
//console.log(pubs[j]);
bulk_request[j*2 + 1] = pubs[j];
//bulk_request[j*2 + 1]._timestamp = pubs[j].Time;
}
//Bulk request to elastic search index
client.bulk({
body: bulk_request
}, function(error, response) {
if (error) {
console.error(error);
return;
}
else {
console.log("Player " + Player + " data ingested.");
}
});
idSum = idSum + pubs.length;
// Vary important to do or js heap will fill up
// Delete object
delete bulk_request
//delete client;
delete client;
}
/*
// USE FOR REALTIME INDEXING OF DATA
//Create ES client and upload json objects
pubs = JSON.parse(fs.readFileSync('json' + '/app10.json')); // name of my first file to parse
//forms = JSON.parse(fs.readFileSync(__dirname + '/forms.json')); // and the second set
//console.log(pubs.length);
//var rounds = Math.floor(pubs.length/30000);
//for(var r = 0; r <= 2; r++) {
//console.log(r);
//var mover = 30000*rounds;
var client = new elasticsearch.Client({ // default is fine for me, change as you see fit
host: 'localhost:9200'
//log: 'trace'
});
//send 50,000 documents at a time
for (var i = 0; i < 30000 ; i++ ) {
//if(pubs.length <= i+1){
// break; //break at end of calls
//}
client.create({
index: "server_data_1.0.5", // name your index
type: "app10", // describe the data thats getting created
id: i,// incremet ID every iteration - I already sorted mine but not a requirement
//timestamp: "2017-02-05'T'12:20:20",
body: pubs[i] // *** THIS ASSUMES YOUR DATA FILE IS FORMATTED LIKE SO: [{prop: val, prop2: val2}, {prop:...}, {prop:...}] - I converted mine from a CSV so pubs[i] is the current object {prop:..., prop2:...}
}, function(error, response) {
if (error) {
console.error(error);
return;
}
else {
//console.log("importing data!!"); I don't recommend this but I like having my console flooded with stuff. It looks cool. Like I'm compiling a kernel really fast.
}
});
}
*/
/*
USE FOR CSV
var Converter = require("csvtojson").Converter;
// create a new converter object
var converter = new Converter({});
// call the fromFile function which takes in the path to your
// csv file as well as a callback function
var json;
//FILEPATH FOR READ
converter.fromFile("csv/01_20150805_Practice1_Summary.xlsx",function(err,result){
// if an error has occured then handle it
if(err){
console.log("An Error Has Occured");
console.log(err);
}
// create a variable called json and store
// the result of the conversion
json = result;
//FILEPATH FOR WRITE
fs.writeFile('json/01_20150805_Practice1_Summary.json', JSON.stringify(json), function (err) {
if (err) return console.log(err);
console.log('Saved');
});
// log our json to verify it has worked
});
delete converter;
*/