-
Notifications
You must be signed in to change notification settings - Fork 0
/
homeshop18mobiles.py
62 lines (51 loc) · 1.75 KB
/
homeshop18mobiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import sys
import urllib2
from bs4 import BeautifulSoup
import time
def findTagValueInSoup(soup, tagName, attrsDict):
element = soup.find(tagName, attrsDict)
if element:
try:
elementValue = element.get_text().decode('utf8').encode("ascii","ignore").strip()
except:
elementValue = element.get_text().encode("ascii","ignore").strip()
else:
elementValue = "NA"
return elementValue
def homeshop18(homeshop18ProductURL):
urlSourceCode = urllib2.urlopen(homeshop18ProductURL.strip())
soup = BeautifulSoup(urlSourceCode)
products=soup.find_all("div",attrs={"class":"box product_div product_div_last key_height"})
#print products
for eachProduct in products:
title=findTagValueInSoup(eachProduct,'p',{"class":"product_title"})
link=eachProduct.find('a',attrs={"class":"srch_rslt_img productTitle"})
productURL="http://www.homeshop18.com"+link['href'].split('?')[0]
price=findTagValueInSoup(eachProduct,'b',{})
product=[productURL,title,price]
#print product
print >> f_out, '\t'.join(product)
products=soup.find_all("div",attrs={"class":"box product_div key_height"})
#print products
for eachProduct in products:
title=findTagValueInSoup(eachProduct,'p',{"class":"product_title"})
link=eachProduct.find('a',attrs={"class":"srch_rslt_img productTitle"})
productURL="http://www.homeshop18.com"+link['href'].split('?')[0]
price=findTagValueInSoup(eachProduct,'b',{})
product=[productURL,title,price]
#print product
print >> f_out, '\t'.join(product)
f_out = open("mobileshomeshop18.csv",'w')
for i in range(67):
try:
url=('http://www.homeshop18.com/mobile-phones/category:14569/start:%d/')%(24*i)
homeshop18(url)
print i
except:
time.sleep(5)
try:
homeshop18(url)
except:
print url
time.sleep(5)
continue