Skip to content

Commit

Permalink
continue for new w2v
Browse files Browse the repository at this point in the history
  • Loading branch information
liuzhiqiang authored and liuzhiqiang committed Oct 31, 2016
1 parent 1ccfe6a commit 710bcd2
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 5 deletions.
7 changes: 5 additions & 2 deletions w2v/doc2vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ D2V * d2v_create(int argc, char * argv[]){
}

int d2v_init(D2V * d2v){
int i, k = d2v->dc->get_k(d2v->dc);
int i, k = d2v->dc->get_k(d2v->dc), t = d2v->dc->get_t(d2v->dc);
d2v->ds = tsd_load(d2v->dc->get_d(d2v->dc));
d2v->model = (Vec*)calloc(1, sizeof(Vec));
d2v->model->k = k;
d2v->model->t = 0;
if (d2v->dc->get_t(d2v->dc) == 2){
if (t == 2){
d2v->model->t = 1;
}
d2v->model->hbt = (int(*)[5])calloc(d2v->ds->v, sizeof(int[5]));
Expand All @@ -39,6 +39,9 @@ int d2v_init(D2V * d2v){
while (i-- > 0){
d2v->model->neu0[i] = ((rand() + 0.1) / (RAND_MAX + 0.1) - 0.5) / k;
}
if (t > 0){
vec_load_tree(d2v->model, d2v->ds, d2v->dc->get_o(d2v->dc), "dvector");
}
return 0;
}

Expand Down
77 changes: 76 additions & 1 deletion w2v/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "str.h"
#include "vec.h"

#define MTDEPT 40
#define MLEN 8192

static int vec_cmp(const void *a, const void *b){
int c = ((int*)a)[1] - ((int*)b)[1];
Expand Down Expand Up @@ -78,7 +80,7 @@ void vec_learn_tree(Vec * vec, float * cw, float * de, int vid, double learn_rat
loss = learn_rate * (sb[s] - 1.0 / (1.0 + exp(-loss)));
if (fabs(loss) > 1e-9) for (t = 0; t < k; t++){
de[t] += loss * vec->neu1[st[s] * k + t];
if (t == 0){
if (vec->t == 0){
vec->neu1[st[s] * k + t] += loss * cw[t];
}
}
Expand Down Expand Up @@ -117,6 +119,79 @@ void vec_save_tree(Vec * vec, TSD * ds, char * outdir){
fclose(fp);
}

void vec_load_tree(Vec * vec, TSD * ds, char * outdir, char * leaff){
int v = ds->v, i;
char *string = NULL, *token = NULL;
char out[512] = {0};
char buf[MLEN] = {0};
FILE * fp = NULL;
sprintf(out, "%s/index", outdir);
if (NULL == (fp = fopen(out, "r"))){
return ;
}
if (v == 0){
while (NULL != fgets(buf, MLEN, fp)){
v += 1;
}
ds->v = v;
rewind(fp);
}
if (!vec->hbt){
vec->hbt = (int(*)[5])calloc(v, sizeof(int[5]));
}
if (!ds->idm){
ds->idm = (char(*)[KEY_SIZE])calloc(v, sizeof(char[KEY_SIZE]));
}
i = 0;
while (NULL != fgets(buf, MLEN, fp)){
string = trim(buf, 3);
token = strsep(&string, "\t");
strncpy(ds->idm[i], token, KEY_SIZE - 1);
vec->hbt[i][0] = atoi(strsep(&string, "\t"));
vec->hbt[i][1] = atoi(strsep(&string, "\t"));
vec->hbt[i][2] = atoi(strsep(&string, "\t"));
vec->hbt[i][3] = atoi(strsep(&string, "\t"));
vec->hbt[i][4] = i;
i += 1;
}
fclose(fp);
sprintf(out, "%s/noleaf", outdir);
if (NULL == (fp = fopen(out, "r"))){
return;
}
if (!vec->neu1){
vec->neu1 = (float*)calloc(v * vec->k, sizeof(float));
}
i = 0;
while (NULL != fgets(buf, MLEN, fp)){
string = trim(buf, 3);
while (NULL != (token = strsep(&string, "\t"))){
vec->neu1[i++] = atof(token);
}
}
fclose(fp);
sprintf(out, "%s/%s", outdir, leaff);
if (NULL == (fp = fopen(out, "r"))){
return;
}
v = 0;
while (NULL != fgets(buf, MLEN, fp)){
v += 1;
}
rewind(fp);
if (!vec->neu1){
vec->neu1 = (float*)calloc(v * vec->k, sizeof(float));
}
i = 0;
while (NULL != fgets(buf, MLEN, fp)){
string = trim(buf, 3);
while (NULL != (token = strsep(&string, "\t"))){
vec->neu1[i++] = atof(token);
}
}
fclose(fp);
}

void vec_free_tree(Vec * vec){
free(vec->hbt);
free(vec->neu0);
Expand Down
1 change: 1 addition & 0 deletions w2v/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ typedef struct _vec {
void vec_build_tree(Vec * vec, int (*wc)[2], int n);
void vec_learn_tree(Vec * vec, float * cw, float * de, int vid, double learn_rate);
void vec_save_tree(Vec * vec, TSD * ds, char * outdir);
void vec_load_tree(Vec * vec, TSD * ds, char * outdir, char * leaff);
void vec_free_tree(Vec * vec);

#endif //VEC_H
6 changes: 4 additions & 2 deletions w2v/word2vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ W2V * w2v_create(int argc, char * argv[]){
}

int w2v_init(W2V * w2v){
int i, k = w2v->wc->get_k(w2v->wc);
int i, k = w2v->wc->get_k(w2v->wc), t = w2v->wc->get_t(w2v->wc);
w2v->ds = tsd_load(w2v->wc->get_d(w2v->wc));
w2v->model = (Vec*)calloc(1, sizeof(Vec));
w2v->model->k = k;
Expand All @@ -36,6 +36,9 @@ int w2v_init(W2V * w2v){
while (i-- > 0){
w2v->model->neu0[i] = ((rand() + 0.1) / (RAND_MAX + 0.1) - 0.5) / k;
}
if (t == 1){
vec_load_tree(w2v->model, w2v->ds, w2v->wc->get_o(w2v->wc), "vector");
}
return 0;
}

Expand Down Expand Up @@ -115,7 +118,6 @@ void w2v_free(W2V * w2v){
free(w2v);
}


int w2v_dsize (W2V * w2v){
return w2v->ds->d;
}
Expand Down

0 comments on commit 710bcd2

Please sign in to comment.