-
Notifications
You must be signed in to change notification settings - Fork 0
/
mainAnalysis.php
152 lines (123 loc) · 5.48 KB
/
mainAnalysis.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
<?php
// mainAnalysis.php: Deploying morphological analysis, named entity extraction and storing data.
// PHP newer than v7 and a morphological analysis tool, MeCab are required for this script.
// Please install them on your system.
$workDir = "/your_directory/ogiNikki/";
$url = 'https://your_domain/ogiNikki/';
//ogiMain CSV data (https://www.dl.saga-u.ac.jp/ogiNikki) : You can download newer data.
$recordBase = file($workDir."ogiMain_sample.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
///////////////////////////////////////////
// Settling a column having a title field for MeCab.
// In ogiMain_sample.csv, "title" field is 5th column. Therefore, array number is 4.
foreach($recordBase as $recBVal) {
$rbval = explode(',', $recBVal);
$record[] = $rbval[4]; // Change number for actual column number on your data.
}
#print_r($record); exit;
// If you start an analysis from record number 1, set 0 in $rnBase.
$rnBase = 0;
$sentence = array();
$mtextarray = array();
for($rn=$rnBase; $rn<count($record); $rn++) {
$value = explode(',', $record[$rn]);
foreach($value as $key => $val) {
$sentence_pre = preg_replace('/ /', '@@', $val);
$sentence_pre = mb_ereg_replace('[@]{2}$|\n', '', $sentence_pre);
$sentence[$rn][] = explode('@@', $sentence_pre);
}
// MeCab
for ($snum = 0; $snum < count($sentence[$rn]); $snum++) {
for ($osnum = 0; $osnum < count($sentence[$rn][$snum]); $osnum++) {
$mecab = new \MeCab\Tagger();
$mtext = $mecab->parse(trim($sentence[$rn][$snum][$osnum]));
$mtext = str_replace("EOS", '', $mtext);
$mdatavals[] = $mtext;
$mtextarray[$rn][$snum][$osnum] = explode("\n", trim($mtext));
}
}
}
#print_r($sentence); exit;
#print_r($mtextarray); exit;
#print count($mtextarray); exit;
#print_r($mdatavals); exit;
/////////////////////////////////////////
// Storing data in a MeCab analyzed file if you want.
#$mdatafile = $workDir."mtextdata".date('YmdHis').'.txt';
#file_put_contents($mdatafile, implode("\n", $mdatavals));
#readfile($mdatafile); exit;
$jinmeiF = file($workDir."userDic/jinmei.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$roleF = file($workDir."userDic/role.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$placeF = file($workDir."userDic/place.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$dateF = file($workDir."userDic/date.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$eventF = file($workDir."userDic/event.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$termsF = file($workDir."userDic/terms.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$quF = file($workDir."userDic/quantity.csv", FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$toki_tango = array();
$qu_tango = array();
$terms_tango = array();
$event_tango = array();
$place_tango = array();
$jinmei_tango = array();
$role_tango = array();
$rn = $snum = $osnum = 0;
for ($i = 0; $i < count($mtextarray); $i++) {
$rn = $rnBase + $i;
for ($e = 0; $e < count($fldflag); $e++) {
$snum = 0;
for ($f = 0; $f < @count($mtextarray[$rn][$snum]); $f++) {
$osnum = $f;
for ($j = 0; $j < count($mtextarray[$rn][$snum][$f]); $j++) {
$tangoarray = explode("\t", $mtextarray[$rn][$snum][$f][$j]);
$tangoarrayNext = @explode("\t", $mtextarray[$rn][$snum][$f][($j+1)]);
$element = @explode(",", $tangoarray[1]);
$uri = $url.($rn + 1)."-".$snum."-".$osnum."-";
// Person
if (@mb_strpos($tangoarray[1], "_JINMEI") !== false &&
(array_search($tangoarray[0], $roleF) === false ||
array_search($tangoarray[0], $eventF) === false)
) {
$jinmei_tango[] = "Person,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Role
if (@mb_strpos($tangoarray[1], "_ROLE") !== false ||
array_search($tangoarray[0], $roleF) !== false) {
$role_tango[] = "Role,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Place
if (@mb_strpos($tangoarray[1], "_PLACE") !== false ||
array_search($tangoarray[0], $placeF) !== false) {
$place_tango[] = "Place,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Date
if (@mb_strpos($tangoarray[1], "_DATE") !== false ||
array_search($tangoarray[0], $dateF) !== false) {
$toki_tango[] = "Date,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Event
if (@mb_strpos($tangoarray[1], "_EVENT") !== false ||
array_search($tangoarray[0], $eventF) !== false) {
$event_tango[] = "Event,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Terms
if (@mb_strpos($tangoarray[1], "_TERMS") !== false) {
$terms_tango[] = "Terms,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
// Quantity
if (@mb_strpos($tangoarray[1], "_QUANTITY") !== false ||
array_search($tangoarray[0], $quF) !== false) {
$qu_tango[] = "Quantity,".($rn + 1).",".$snum.",".$osnum.",".$j.",".$j.",".$tangoarray[0].",".$uri.$j."-".$j;
}
}
}
}
}
// Saving NEE data in a file.
$data = array_merge($event_tango, $toki_tango, $place_tango, $jinmei_tango, $terms_tango, $role_tango, $qu_tango);
sort($data, SORT_NATURAL);
$data2 = array_unique($data, SORT_STRING);
#print_r($data2); exit;
$indexfile = $workDir."oginikki".date('YmdHis').'.index';
file_put_contents($indexfile, implode("\n", $data2));
readfile($indexfile);
// Use $indexfile on your database or tools!
?>