-
Notifications
You must be signed in to change notification settings - Fork 0
/
inverted_index.h
53 lines (43 loc) · 1.58 KB
/
inverted_index.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#pragma once
#pragma execution_character_set("UTF-8")
#ifndef INVERTED_INDEX_H_
#define INVERTED_INDEX_H_
#include "dictionary.h"
typedef unsigned int uint;
typedef struct DOCNODE
{
uint docid; //unique id for this document
double tf; //normalized term frequency
struct DOCNODE* pnext;
}DOCNODE;
/*all the documents contain the specific word are stored in a linked list and sorted in ascending order of document id.*/
typedef struct WORDNODE
{
char* pword;
DOCNODE* phead; //pointer to the first document of a linked list
double idf; //inverse document frequency:log(N/(n_i+1))
struct WORDNODE* pnext;
}WORDNODE;
typedef struct INDEXITEM
{
uint hashcode; //hash code for words
struct WORDNODE* phead;
}INDEXITEM;
typedef struct INDEXTABLE
{
uint prime_num; //prime_num for hashing
struct INDEXITEM* pindex;
}INDEXTABLE;
uint inverted_index_hash(const void* ptr, int lenth);
uint find_prime(uint bound);
WORDNODE* inverted_index_new_wordnode(const char* src);
DOCNODE* inverted_index_new_docnode(int doc_id);
INDEXTABLE* inverted_index_initialize(const DICTIONARY* pdict);
void inverted_index_insert_word(INDEXTABLE* ptable, const char* pword);
void inverted_index_insert_words(INDEXTABLE* ptable, const TRIETREE root, char* buf, int depth);
void inverted_index_insert_dictionary(INDEXTABLE* ptable, const DICTIONARY* pdict);
void inverted_index_destroy_table(INDEXTABLE** pptable);
void inverted_index_save(const INDEXTABLE* ptable, const char* outfilename);
INDEXTABLE* inverted_index_load(const char* infilename);
void inverted_index_print(const INDEXTABLE* ptable);
#endif