-
Notifications
You must be signed in to change notification settings - Fork 1
/
metadata_regression.m
75 lines (60 loc) · 1.98 KB
/
metadata_regression.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
% Clinical information regression model
clear all; close all; clc
% Initialize random seed for reproducibility
rng('default')
addpath(genpath('utils'))
% Select folder with data
myDir = uigetdir(pwd,'Open database directory');
signalFiles = dir(fullfile(myDir,'*m.mat'));
headerFiles = dir(fullfile(myDir,'*.hea'));
LabelsVector = {};
count =1;
for k = 1:length(headerFiles)
[X(count,:),y(count,:),names{count} ] = read_data(k,myDir);
% Exclude recordings done before 26 weeks of gestation
if X(count,1) < 26*7
k;
continue
end
count = count+1;
end
% Remove diagnoses of hypertension, diabetes, placental_position,
% funneling, as explained in the paper.
X(:,[6,7,8,11])=[];
k=5;
count = 1;
%% Repeat cross-validation 20 times with different sampling.
for jj=1:20
% Regression with cross-validation
c = cvpartition(y(:,1),'KFold',k);
y_test_cat=[];
y_hat_cat =[];
for i = 1:k
idxTrain = training(c,i);
idxTest = test(c,i);
XTrain = X(idxTrain,:);
XTest = X(idxTest,:);
X_train_no_nan = XTrain;
X_test_no_nan = XTest;
% Complete missing entries
for j = 1:size(XTrain,2)
mode_i = mode(XTrain(:,j));
is_nan_in_column = isnan(XTrain(:,j));
X_train_no_nan(is_nan_in_column,j) = mode_i;
is_nan_in_column_test = isnan(XTest(:,j));
X_test_no_nan(is_nan_in_column_test,j) = mode_i;
end
XTest = X_test_no_nan;
XTrain = X_train_no_nan;
YTrain = y(idxTrain,2);
Mdl = fitlm(XTrain,YTrain);
YTest = y(idxTest,2);
y_hat = predict(Mdl,XTest);
y_hat_cell{count} = y_hat;
y_test_cell{count} = YTest;
count = count+1;
end
end
%%
% Save data for analysis
% save('./results/roc_aucs/regression_metadata','y_test_cell','y_hat_cell','y');