Skip to content

Commit

Permalink
added the rest
Browse files Browse the repository at this point in the history
  • Loading branch information
nberr committed Feb 27, 2024
1 parent 814c379 commit 1ad53d0
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 3 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:

steps:
- name: Checkout Repository
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: '3.x' # Specify your desired Python version

Expand Down
230 changes: 229 additions & 1 deletion basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build

from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup

PSA_USERNAME = os.environ['PSA_USERNAME']
PSA_PASSWORD = os.environ['PSA_PASSWORD']
#
FTP_HOST = os.environ['FTP_HOST']
FTP_USERNAME = os.environ['FTP_USERNAME']
FTP_PASSWORD = os.environ['FTP_PASSWORD']

CHROMEDRIVER_PATH = '/usr/lib/chromium-browser/chromedriver'

###########################
# log into google account #
###########################
Expand Down Expand Up @@ -69,4 +87,214 @@

exit(0)

sys.stdout.write(f'{len(messages)} email(s) found with matching subject: {subject}.\n')
sys.stdout.write(f'{len(messages)} email(s) found with matching subject: {subject}.\n')

########################
# log into PSA account #
########################
attempts = 0
max_attempts = 5

# set options for browser
chrome_options = Options()
chrome_options.add_argument('--headless') # Run Chrome in headless mode
chrome_options.add_argument('--disable-gpu')
chrome_service = Service(CHROMEDRIVER_PATH) # Specify the path to chromedriver executable

# the browser
driver = webdriver.Chrome(service=chrome_service, options=chrome_options)

while attempts < max_attempts:

sys.stdout.write(f'Attempt #{attempts+1} to sign into PSA account...')

try:

# navigate to PSA login
driver.get('https://app.collectors.com/signin?b=PSA&r=http://www.psacard.com/myaccount?site%3Dpsa')

wait = WebDriverWait(driver, 10)
email_element = wait.until(EC.presence_of_element_located((By.ID, 'email')))

# Find the input element with id="email" and enter an email
email_input = driver.find_element(By.ID, 'email')
email_input.send_keys(PSA_USERNAME)

# Find the button with type="submit" and click it
submit_button = driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
submit_button.click()

# Wait until the page is loaded
wait = WebDriverWait(driver, 10) # Adjust the timeout as needed
password_element = wait.until(EC.presence_of_element_located((By.ID, 'password')))

# Find the element with id="password" and enter the password
password_input = driver.find_element(By.ID, 'password')
password_input.send_keys(PSA_PASSWORD)

# Find the button with type="submit" and click it
login_button = driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
login_button.click()

# Wait until the page is loaded
wait = WebDriverWait(driver, 100) # Adjust the timeout as needed
title = wait.until(EC.title_is('PSA Collectibles Authentication and Grading Service'))

sys.stdout.write('success!\n')
break

except Exception as e:

driver.quit()

sys.stdout.write(f'failed: {e}\n')
attempts += 1

if attempts is max_attempts:
sys.stdout.write('Ran out of attempts...script will run again in 6 hours.\n')
exit(1)

##################
# For each email #
##################
for index, message in enumerate(messages):

sys.stdout.write(f'Working on email {index+1}...')

##########################################
# Extract the link and go to the webpage #
##########################################
msg = gmail_service.users().messages().get(userId='me', id=message['id'], format='full').execute()
payload = msg['payload']

# get the email body
body = None

if 'parts' in payload:
for part in payload['parts']:
if 'body' in part:
body_data = part['body']
if 'data' in body_data:
body = base64.urlsafe_b64decode(body_data['data']).decode()
elif 'body' in payload:
body_data = payload['body']
if 'data' in body_data:
body = base64.urlsafe_b64decode(body_data['data']).decode()

# extract the link
link = None

if body:
link_pattern = r'(https://www\.psacard\.com/myaccount/myorder\S*)'
match = re.search(link_pattern, body)
if match:
link = match.group(0)
link = link[:-1]

# navigate to the URL
if link:
driver.get(link)

# Wait until the page is loaded
wait = WebDriverWait(driver, 100) # Adjust the timeout as needed
title = wait.until(EC.title_contains("Order"))

# Get the page source
page_source = driver.page_source

# extract the table
soup = BeautifulSoup(page_source, 'html.parser')
table = soup.find('table')

# Find the table header row
#header_row = soup.find('thead').find('tr')
#
## indices for columns that will be removed
#line_column_index = 0
#images_column_index = 5
#type_column_index = 6

## Remove the columns from the header row
#header_row.find_all('th')[type_column_index].extract()
#header_row.find_all('th')[images_column_index].extract()
#header_row.find_all('th')[line_column_index].extract()
#
## Find the table body rows
#body_rows = soup.find('tbody').find_all('tr')
#
## Remove the columns from each body row
#for row in body_rows:
# row.find_all('td')[type_column_index].extract()
# row.find_all('td')[images_column_index].extract()
# row.find_all('td')[line_column_index].extract()
#
# # Remove the <a> tag from the "Cert #" columns in each body row
# cert_column = row.find('td', {'data-title': 'Cert #'})
# if cert_column is not None and cert_column.a:
# cert_column.a.unwrap()
#
## adjust the table
#tables = soup.find_all('table')

## Iterate through each table
#for t in tables:
# # Find all rows in the table
# rows = t.find_all('tr')

# # Iterate through each row
# for row in rows:
# # Find all cells in the row
# cells = row.find_all(['td', 'th'])

# # Check if the row has at least 3 cells
# if len(cells) >= 3:
# # Set the width of the 3rd column to 50%
# cells[3]['style'] = 'width: 50%;'

# # Add black border lines between rows
# t['style'] = 'border-collapse: collapse; border: 1px solid black;'

# Use regex to find the number after "Submission"
submission_number = 0

match = re.search(r"Submission (\d+)", str(soup.find('title')))
if match:
submission_number = match.group(1)
# print(f'sub number: {submission_number}')

html_out = str(table)

# Save the page source as a file
# filename = 'webpage.html'
# with open(filename, 'w', encoding='utf-8') as file:
# file.write(soup.prettify())

# create the file
doc_title = str(submission_number) + '.txt'
sub_folder = 'Submissions/'

if not os.path.exists(sub_folder):
os.makedirs(sub_folder)

doc_path = os.path.join(sub_folder, doc_title)

with open(doc_path, 'w') as file:
# write the content
file.write(html_out)

# delete the email
gmail_service.users().messages().delete(userId='me', id=message['id']).execute()
sys.stdout.write('done!\n')

sys.stdout.write('Deleting remaining emails...')

results = gmail_service.users().messages().list(userId='me').execute()
messages = results.get('messages', [])

# Delete each email
for message in messages:
gmail_service.users().messages().trash(userId='me', id=message['id']).execute()

sys.stdout.write('done!\n')

driver.quit()

0 comments on commit 1ad53d0

Please sign in to comment.