Skip to content

Commit

Permalink
feat: Add scrapping script to get icon list from font awesome website
Browse files Browse the repository at this point in the history
  • Loading branch information
jimbiscuit committed Mar 30, 2023
1 parent 9ebce68 commit 7c1bcfc
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 0 deletions.
20 changes: 20 additions & 0 deletions scrapping/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Scripping icon list

This script is use to scrap icon list with their category from font awesome to generate iconList for search querry

## Install
1. Init a new virtual env
```bash
virtualenv-3.8 .
```
2. Install requirements
```bash
bin/pip install -r requirements.txt
```
3. launch the script
```bash
bin/python main.py
```
A json file is created with the js object for iconList.js

todo : Generate directly the js file
1 change: 1 addition & 0 deletions scrapping/iconList.json

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions scrapping/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

import json

FA_SEARCH_URL = "https://fontawesome.com/search?o=a&m=free&s=solid"
FA_CAT_URL = FA_SEARCH_URL+"&c={id}"

def tokenize_title(title):
if title == "Disaster + Crisis":
return "disaster"

if title == "Genders":
return "gender"

title = title.lower()
title = title.replace(" + ", "-")
title = title.replace(" ", "-")

return title

def main():
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)
driver.get(FA_SEARCH_URL)

WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "wrap-icons-facet-input")))

soup = BeautifulSoup(driver.page_source, "html.parser")

categories = soup.find("div",{"class":"wrap-icons-facets-menu"}).find("ul",{"class":"icons-facets-group-categories"}).find_all("li",{"class":"wrap-icons-facet-input"})

cat_list = [cat.find("span",{"class":"text-capitalize"}).string for cat in categories]

icon_list = {
tokenize_title(cat):{
"title": cat,
"list": []
}
for cat in cat_list
}

for count, cat in enumerate(cat_list):
icon_token = tokenize_title(cat)
driver.get(FA_CAT_URL.format(id=icon_token))
print(f"Accessing {icon_token} page ({count+1}/{len(cat_list)})")
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "wrap-icon")))
soup = BeautifulSoup(driver.page_source, "html.parser")
icons = soup.find_all("article",{"class":"wrap-icon"})
icon_list[icon_token]["list"] = [{"name": icon.find("span",{"class":"icon-name"}).string} for icon in icons]


driver.quit()

with open('iconList.json', 'w') as fp:
json.dump(icon_list, fp)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions scrapping/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
selenium==4.8.3
beautifulsoup4==4.12.0

0 comments on commit 7c1bcfc

Please sign in to comment.