Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround for SSLError while downloaidng dataset #269

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions feature_selection/find_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
### remainder go into training)
### feature matrices changed to dense representations for compatibility with
### classifier functions in versions 0.15.2 and earlier
from sklearn import cross_validation
features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
from sklearn import model_selection
features_train, features_test, labels_train, labels_test = model_selection.train_test_split(word_data, authors, test_size=0.1, random_state=42)

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
Expand Down
2 changes: 1 addition & 1 deletion outliers/outlier_removal_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
### and n_columns is the number of features
ages = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here! Name the regression object reg so that
Expand Down
11 changes: 7 additions & 4 deletions pca/eigenfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import pylab as pl
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.decomposition import PCA
from sklearn.svm import SVC

# Display progress logs on stdout
Expand Down Expand Up @@ -70,7 +70,10 @@

print "Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])
t0 = time()
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
pca = PCA(
n_components=n_components,
svd_solver='randomized',
whiten=True).fit(X_train)
print "done in %0.3fs" % (time() - t0)

eigenfaces = pca.components_.reshape((n_components, h, w))
Expand Down
17 changes: 16 additions & 1 deletion tools/startup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,22 @@
print "download will complete at about 423 MB"
import urllib
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
urllib.urlretrieve(url, filename="../enron_mail_20150507.tar.gz")
filename = "../enron_mail_20150507.tar.gz"
try:
urllib.urlretrieve(url, filename=filename)
except IOError as socket_error:
expected_error = (
"IOError('socket error', SSLError(1, u'[SSL: DH_KEY_TOO_SMALL]"+
" dh key too small (_ssl.c:727)'))"
)
if repr(socket_error) == expected_error:
import ssl
cipher = "ECDHE-RSA-AES128-GCM-SHA256"
context = ssl.create_default_context()
context.set_ciphers(cipher)
urllib.urlretrieve(url, filename=filename, context=context)
else:
raise socket_error
print "download complete!"


Expand Down