-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add scrapping script to get icon list from font awesome website
- Loading branch information
1 parent
9ebce68
commit 7c1bcfc
Showing
4 changed files
with
89 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Scripping icon list | ||
|
||
This script is use to scrap icon list with their category from font awesome to generate iconList for search querry | ||
|
||
## Install | ||
1. Init a new virtual env | ||
```bash | ||
virtualenv-3.8 . | ||
``` | ||
2. Install requirements | ||
```bash | ||
bin/pip install -r requirements.txt | ||
``` | ||
3. launch the script | ||
```bash | ||
bin/python main.py | ||
``` | ||
A json file is created with the js object for iconList.js | ||
|
||
todo : Generate directly the js file |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from selenium import webdriver | ||
from selenium.webdriver.chrome.options import Options | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from bs4 import BeautifulSoup | ||
|
||
import json | ||
|
||
FA_SEARCH_URL = "https://fontawesome.com/search?o=a&m=free&s=solid" | ||
FA_CAT_URL = FA_SEARCH_URL+"&c={id}" | ||
|
||
def tokenize_title(title): | ||
if title == "Disaster + Crisis": | ||
return "disaster" | ||
|
||
if title == "Genders": | ||
return "gender" | ||
|
||
title = title.lower() | ||
title = title.replace(" + ", "-") | ||
title = title.replace(" ", "-") | ||
|
||
return title | ||
|
||
def main(): | ||
options = Options() | ||
options.add_argument('--headless') | ||
options.add_argument('--disable-gpu') | ||
driver = webdriver.Chrome(options=options) | ||
driver.get(FA_SEARCH_URL) | ||
|
||
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "wrap-icons-facet-input"))) | ||
|
||
soup = BeautifulSoup(driver.page_source, "html.parser") | ||
|
||
categories = soup.find("div",{"class":"wrap-icons-facets-menu"}).find("ul",{"class":"icons-facets-group-categories"}).find_all("li",{"class":"wrap-icons-facet-input"}) | ||
|
||
cat_list = [cat.find("span",{"class":"text-capitalize"}).string for cat in categories] | ||
|
||
icon_list = { | ||
tokenize_title(cat):{ | ||
"title": cat, | ||
"list": [] | ||
} | ||
for cat in cat_list | ||
} | ||
|
||
for count, cat in enumerate(cat_list): | ||
icon_token = tokenize_title(cat) | ||
driver.get(FA_CAT_URL.format(id=icon_token)) | ||
print(f"Accessing {icon_token} page ({count+1}/{len(cat_list)})") | ||
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "wrap-icon"))) | ||
soup = BeautifulSoup(driver.page_source, "html.parser") | ||
icons = soup.find_all("article",{"class":"wrap-icon"}) | ||
icon_list[icon_token]["list"] = [{"name": icon.find("span",{"class":"icon-name"}).string} for icon in icons] | ||
|
||
|
||
driver.quit() | ||
|
||
with open('iconList.json', 'w') as fp: | ||
json.dump(icon_list, fp) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
selenium==4.8.3 | ||
beautifulsoup4==4.12.0 |