Skip to content

Commit

Permalink
Merge pull request #13 from techaddict/master
Browse files Browse the repository at this point in the history
Minor Typo candiate_words -> candidate_words
  • Loading branch information
codelucas committed Jan 20, 2014
2 parents 58b1578 + 616aa83 commit 42e0f27
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions newspaper/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ def remove_punctuation(self, content):
content = content.encode('utf-8')
return content.translate(self.TRANS_TABLE, string.punctuation)

def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
return stripped_input.split(' ')

def get_stopword_count(self, content):
if not content:
return WordStats()
ws = WordStats()
stripped_input = self.remove_punctuation(content)
candiate_words = self.candiate_words(stripped_input)
candidate_words = self.candidate_words(stripped_input)
overlapping_stopwords = []
c = 0
for w in candiate_words:
for w in candidate_words:
c += 1
if w.lower() in self.STOP_WORDS:
overlapping_stopwords.append(w.lower())
Expand All @@ -104,7 +104,7 @@ class StopWordsChinese(StopWords):
def __init__(self, language='zh'):
super(StopWordsChinese, self).__init__(language='zh')

def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
# jieba builds a tree that takes a while. avoid building
# this tree if we don't use the chinese language
from .packages import jieba
Expand All @@ -122,7 +122,7 @@ def __init__(self, language='ar'):
def remove_punctuation(self, content):
return content

def candiate_words(self, stripped_input):
def candidate_words(self, stripped_input):
import nltk
s = nltk.stem.isri.ISRIStemmer()
words = []
Expand All @@ -143,10 +143,10 @@ def get_stopword_count(self, content):
return WordStats()
ws = WordStats()
stripped_input = self.remove_punctuation(content)
candiate_words = self.candiate_words(stripped_input)
candidate_words = self.candidate_words(stripped_input)
overlapping_stopwords = []
c = 0
for w in candiate_words:
for w in candidate_words:
c += 1
for stop_word in self.STOP_WORDS:
overlapping_stopwords.append(stop_word)
Expand Down

0 comments on commit 42e0f27

Please sign in to comment.