diff --git a/app/wsmodules/DataAnalyser.py b/app/wsmodules/DataAnalyser.py new file mode 100755 index 0000000..290791e --- /dev/null +++ b/app/wsmodules/DataAnalyser.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" DataAnalyzer creates statistical text segments and images for report + +DataAnalyzer provides functionality to extract statistical data segments +and create statistical images from data frame and postgress database. + +Module requires: + * cleaned-sorted-df.csv - contains scraped data + +Module creates: + * daily_room_type_stats.txt + + +Todo: + * [ ] Create str segments (most of algos are in jupyter) + - [ ] rooms types % + - [ ] house floors + - [ ] apt locations + - [ ] sqm size ranges for each + - [ ] sqm price ranges for each + * [ ] Create images based on DF + - [ ] gen_image(data_frame, 'Size_sqm', "Price_in_eur") - created but df not filtered by room cunt = 1 + - [ ] gen_image('double_room_sqm_prices.png') + - [ ] gen_image('triple_room_sqm_prices.png') + - [ ] gen_image('quad_room_sqm_prices.png') + - [] gen_image('all_room_sqm_prices.png') + + * Need interface to connect to DB and extract historic dict and save to to df and csv + +""" +import pandas as pd + + +class DataFrameAvalyzer(): + + def __init__(self, df_file_name: str): + self.df_file_name = df_file_name + + def analyze_df_room_types(self, file) -> None: + pass + + def analyze_df_house_types(self, file) -> None: + pass + + def analyze_df_apt_loc_types(self, file) -> None: + pass + + + def gen_image(self, data_frame: pd.DataFrame, xclmn: str, yclmn: str) -> None: + """Generate scatter plot based x and y axsis as data frame column values, + include title and save to *.png file""" + img_title = 'All room sqm size to price relationships' + #file_name = '{}_{}.png'.format(xclmn, yclmn) + file_name = 'all_room_sqm_prices.png' + ax = data_frame.plot.scatter( + x=xclmn, y=yclmn, s=100, title=img_title, grid=True) + fig = ax.get_figure() + fig.savefig(file_name) + + +class DBAnalyzer(): + pass + + +def main(): + """docstring""" + run_daily_analitics() + run_monthly_analitics() + + +def run_daily_analitics() -> None: + """docstring""" + data_frame = pd.read_csv('cleaned-sorted-df.csv') + dfa = DataFrameAvalyzer('cleaned-sorted-df.csv') + #dfa.analyze_df_room_types('daily_room_stats.txt') + #dfa.analyze_df_house_types('daily_house_stats.txt') + #dfa.analyze_df_apt_loc_types('daily_apt_loc_stats.txt') + #dfa.gen_image(data_frame, 'Size_sqm', "Price_in_eur") + #dfa.gen_image('double_room_sqm_prices.png') + #dfa.gen_image('triple_room_sqm_prices.png') + #dfa.gen_image('quad_room_sqm_prices.png') + dfa.gen_image(data_frame, 'Size_sqm', "Price_in_eur") + + +def run_monthly_analitics() -> None: + """docstring""" + pass + + +main() + + diff --git a/app/wsmodules/Report.py b/app/wsmodules/Report.py new file mode 100644 index 0000000..5b073c4 --- /dev/null +++ b/app/wsmodules/Report.py @@ -0,0 +1,49 @@ +""" FIXME module docstring """ +from datetime import datetime +from time import strftime +from fpdf import FPDF + + +class Report(): + """ docsting """ + + def __init__(self, report_type, file_name): + self.report_type = report_type + self.file_name = file_name + self.pdf = FPDF() + self.pdf.add_page() + self.pdf.set_font('Arial', 'B', 16) + #self.insert_header(self.report_type) + + def insert_header(self, report_type: str) -> None: + """ docstring """ + todays_date = datetime.today().strftime('%Y-%m-%d %H:%M') + report_title = f"Ogre city apartments for sale {report_type}" + date_created = f"Report date: {todays_date}" + self.pdf.write(5, report_title) + self.pdf.ln(5) + self.pdf.write(5, date_created) + self.pdf.ln(5) + + def insert_text_segment(self, text_lines: str) -> None: + """ docstring """ + self.pdf.ln(5) # line break + self.pdf.write(5, text_lines) + self.pdf.ln(5) + + def insert_error_msg(self, msg: str) -> None: + """ docstring """ + self.pdf.ln(5) # line break + self.pdf.write(5, msg) + self.pdf.ln(5) + + + def insert_images(self, images: list) -> None: + """ docstring """ + for image in images: + self.pdf.image(image, x=10, y=10, w=100, h=100) + self.pdf.ln(5) + + def save_report(self, file_name: str) -> None: + """ docstring """ + self.pdf.output(file_name, 'F') diff --git a/app/wsmodules/analytics.py b/app/wsmodules/analytics.py index 182dacc..cb3e4a2 100644 --- a/app/wsmodules/analytics.py +++ b/app/wsmodules/analytics.py @@ -14,7 +14,7 @@ log = logging.getLogger('') -log.setLevel(logging.DEBUG) +log.setLevel(logging.INFO) fa_log_format = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] : %(funcName)s: %(lineno)d: %(message)s") ch = logging.StreamHandler(sys.stdout) ch.setFormatter(fa_log_format) diff --git a/app/wsmodules/data_formater_v14.py b/app/wsmodules/data_formater_v14.py index b562637..dae158b 100644 --- a/app/wsmodules/data_formater_v14.py +++ b/app/wsmodules/data_formater_v14.py @@ -90,8 +90,10 @@ def create_oneline_report(source_file: str): def create_file_copy() -> None: - """Creates report file copy in data folder""" - copy_cmd = 'mv cleaned-sorted-df.csv data/' + """Creates file copy with date in name to data folder""" + todays_date = datetime.today().strftime('%Y-%m-%d') + dest_file = 'pandas_df_' + todays_date + '.csv' + copy_cmd = 'cp pandas_df.csv data/' + dest_file if not os.path.exists('data'): os.makedirs('data') os.system(copy_cmd) diff --git a/app/wsmodules/db_worker.py b/app/wsmodules/db_worker.py index 4f787d3..2675018 100755 --- a/app/wsmodules/db_worker.py +++ b/app/wsmodules/db_worker.py @@ -25,6 +25,7 @@ 12.[] Check if report day by last x days count and generate report 13.[] Write tests for db_worker module """ +import os import sys import logging from logging.handlers import RotatingFileHandler @@ -78,16 +79,20 @@ def db_worker_main() -> None: def check_files(file_names: list) -> None: """Testing if file exists and can be opened""" - for f in file_names: + cwd = os.getcwd() + for file_name in file_names: try: - file = open(f, 'r') + logger.info(f'Checking if required module file {file_name} exits in {cwd}') + file = open(file_name, 'r') except IOError: - logger.error(f'There was an error opening the file {f} or file does not exist!') + logger.error(f'There was an error opening the file {file_name} or file does not exist!') sys.exit() def load_csv_to_df(csv_file_name: str): - """reads csv file and return pandas data frame""" + """reads csv file and returns pandas data frame""" + cwd = os.getcwd() + logger.info(f'Loading {csv_file_name} from directory {cwd}') df = pd.read_csv(csv_file_name) logger.info(f'Loaded {csv_file_name} file to pandas data frame in memory') return df @@ -101,7 +106,8 @@ def extract_url_hashes_from_df(df_name) -> list: for full_url in urls: url_hash = extract_hash(full_url) url_hashes.append(url_hash) - logger.info(f'Extracted {len(url_hashes)} url hashes from pandas data frame') + logger.info(f'Extracted {len(url_hashes)} url hashes from todays scraped data') + logger.info(f'Extracted {url_hashes} url hashes from todays scraped data') return url_hashes @@ -114,7 +120,7 @@ def extract_hash(full_url: str) -> str: return url_hash -def extract_listed_url_hashes_from_db(): +def extract_listed_url_hashes_from_db() -> list: """Iterate over all rows in listed_ads table and extract each url hash column value and return as list of hashes""" conn = None @@ -130,7 +136,7 @@ def extract_listed_url_hashes_from_db(): row = cur.fetchone() cur.close() except (Exception, psycopg2.DatabaseError) as error: - print(error) + logger.error(f'{error}') finally: if conn is not None: conn.close() @@ -141,6 +147,8 @@ def extract_listed_url_hashes_from_db(): clean_hash = clean_element.replace("(", "").replace(",", "") clean_hashes.append(clean_hash) logger.info(f'Extracted {len(clean_hashes)} hashes from database listed_ads table') + logger.info(f'Extracted clean hash count: {len(clean_hashes)}') + logger.info(f'Extracted clean hash list: {clean_hashes}') return clean_hashes @@ -150,7 +158,7 @@ def compare_df_to_db_hashes(df_hashes: list, db_hashes: list) -> list: new_ads = [] existing_ads = [] removed_ads = [] - logger.info(f'Comparing {len(df_hashes)} data frame hashes with {len(db_hashes)} listed table hashes') + logger.info(f'Comparing {len(df_hashes)} todays scraped data hashes with {len(db_hashes)} DB listed_ads table hashes') for df_hash in df_hashes: if df_hash in db_hashes: existing_ads.append(df_hash) @@ -163,12 +171,17 @@ def compare_df_to_db_hashes(df_hashes: list, db_hashes: list) -> list: hash_categories.append(existing_ads) hash_categories.append(removed_ads) logger.info(f'Result {len(new_ads)} new, {len(existing_ads)} still_listed, {len(removed_ads)} to_remove hashes ') + logger.info(f'New todays scraped hashes: {new_ads}') + logger.info(f'Hashes from DB listed_ads table: {existing_ads}') + logger.info(f'Hashes for DB removed_ads table: {removed_ads}') return hash_categories def extract_new_msg_data(df, new_msg_hashes: list) -> dict: """ Extract data from df and return as dict hash: (list column data for hash row)""" data_dict = {} + logger.info(f'new_msg_hashes count {len(new_msg_hashes)}, hashes: {new_msg_hashes}') + logger.info('Starting extract new ads from todays scraped data farme in memory') for hash_str in new_msg_hashes: for index, row in df.iterrows(): url = row['URL'] @@ -190,6 +203,9 @@ def extract_new_msg_data(df, new_msg_hashes: list) -> dict: row_data.append(days_count) if url_hash == hash_str: data_dict[url_hash] = row_data + logger.info(f'Extrcted new ad count from todays data frame {len(data_dict)} ') + for k, v in data_dict.items(): + logger.info(f'{k} {v}') return data_dict @@ -271,6 +287,8 @@ def insert_data_to_listed_table(data: dict) -> None: days_listed)) conn.commit() cur.close() + for k, v in data.items(): + logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: @@ -339,6 +357,7 @@ def extract_to_increment_msg_data(listed_url_hashes:list) -> list: conn = None to_increment_msg_data = {} try: + logger.info(f'Connecting to DB to fetch data from listed_ads table') params = config() conn = psycopg2.connect(**params) cur = conn.cursor() @@ -361,12 +380,14 @@ def extract_to_increment_msg_data(listed_url_hashes:list) -> list: data_values.append(dlv) to_increment_msg_data[curr_row_hash] = data_values cur.close() + logger.info(f'Extracted data from listed_ads table for {len(to_increment_msg_data)} messages') + for k, v in to_increment_msg_data.items(): + logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() - logger.info(f'Extracted data from listed_ads table for {len(to_increment_msg_data)} messages') return to_increment_msg_data @@ -417,6 +438,8 @@ def insert_data_to_removed_table(data: dict) -> None: days_listed)) conn.commit() cur.close() + for k, v in data.items(): + logger.info(f'{k} {v}') except (Exception, psycopg2.DatabaseError) as error: logger.error(error) print(error) @@ -440,6 +463,7 @@ def delete_db_listed_table_rows(delisted_hashes: list) -> None: cur.execute(full_cmd) conn.commit() cur.close() + logger.info(f'Deleted ads with hashes: {delisted_hashes} from listed_ads table') except (Exception, psycopg2.DatabaseError) as error: print(error) finally: @@ -462,6 +486,8 @@ def update_dlv_in_db_table(data: dict, todays_date: datetime) -> None: if correct_dlv == days_listed: pass logger.info(f'Updated days_listed value for {dlv_count} messages in listed_ads table') + for k, v in data.items(): + logger.info(f'{k} {v}') def calc_valid_dlv(pub_date: str, todays_date: datetime) -> int: diff --git a/app/wsmodules/df_cleaner.py b/app/wsmodules/df_cleaner.py index 9a771c7..6ca7135 100644 --- a/app/wsmodules/df_cleaner.py +++ b/app/wsmodules/df_cleaner.py @@ -6,10 +6,10 @@ 3. Save as clean df in csv format """ import pandas as pd +import os +from datetime import datetime + -print("Debug info: Starting data frame cleaning module ... ") -# loading data to dataframe from csv file -df_to_clean = pd.read_csv("pandas_df.csv") def clean_data_frame(df_name): @@ -111,6 +111,8 @@ def create_email_body(clean_data_frame, file_name: str) -> None: def df_cleaner_main(): """ Cleans df, sorts df by price in EUR, save to csv file """ + print("Debug info: Starting data frame cleaning module ... ") + df_to_clean = pd.read_csv("pandas_df.csv") clean_df = clean_data_frame(df_to_clean) clean_sqm_col = clean_sqm_column(clean_df) clean_price_col = split_price_column(clean_sqm_col) @@ -118,10 +120,19 @@ def df_cleaner_main(): sorted_df = clean_df.sort_values(by='Price_in_eur', ascending=True) sorted_df.to_csv("cleaned-sorted-df.csv") all_ads_df = pd.read_csv("cleaned-sorted-df.csv", index_col=False) + create_file_copy() create_email_body(all_ads_df, 'email_body_txt_m4.txt') print("Debug info: Completed dat_formater module ... ") +def create_file_copy() -> None: + """Creates file copy in data folder""" + todays_date = datetime.today().strftime('%Y-%m-%d') + dest_file = 'cleaned-sorted-df-' + todays_date + '.csv' + copy_cmd = 'cp cleaned-sorted-df.csv data/' + dest_file + if not os.path.exists('data'): + os.makedirs('data') + os.system(copy_cmd) + -# Main module code driver df_cleaner_main() diff --git a/app/wsmodules/gen_report.py b/app/wsmodules/gen_report.py new file mode 100644 index 0000000..dca6e45 --- /dev/null +++ b/app/wsmodules/gen_report.py @@ -0,0 +1,37 @@ +from Report import Report + + +segments = [] +images = [ ] +#segments = [ 'room_stats.txt', +# 'house_stats.txt', +# 'apt_loc_stats.txt' ] + + +#images = [ 'single_room_sqm_prices.png', +# 'double_room_sqm_prices.png', +# 'triple_room_sqm_prices.png', +# 'quad_room_sqm_prices.png', +# 'all_room_sqm_prices.png' ] + + +def gen_report(report_type: str, file_name: str, segments: list, images: list) -> None: + """ + Generate report. + """ + report = Report(report_type, file_name) + report.insert_header('Daily Report') + if len(segments) > 0: + for segment in segments: + report.insert_text_segment(segment) + if len(segments) == 0: + report.insert_error_msg('No segments to display') +# if len(images) > 0: +# report.insert_images(images) + if len(images) == 0: + report.insert_error_msg('No images to display') + report.save_report('Ogre_daily.pdf') + + +gen_report('Daily', 'Ogre_daily.pdf', segments, images) + diff --git a/app/wsmodules/pdf_creator.py b/app/wsmodules/pdf_creator.py index 5ff9a15..24e2e43 100644 --- a/app/wsmodules/pdf_creator.py +++ b/app/wsmodules/pdf_creator.py @@ -48,6 +48,10 @@ def pdf_creator_main(): "Single room apartments", '1_rooms.png') create_png_plot(two_room_df, 'Size_sqm', "Price_in_eur", "Double room apartments", '2_rooms.png') + create_png_plot(three_room_df, 'Size_sqm', "Price_in_eur", + "Three room apartments", '3_rooms.png') + create_png_plot(four_room_df, 'Size_sqm', "Price_in_eur", + "Four room apartments", '4_rooms.png') # continue work from here @@ -143,7 +147,7 @@ def create_pdf(data_frame, title: str, date: str, file_to_save: str) -> None: def create_pdf_report(text_lines: list, msg_txt_lines: list) -> None: """ This is draft function to test ability to write to create and write pdf file """ # library help https://pyfpdf.readthedocs.io/en/latest/reference/image/index.html - report_title = "Ogre city apartments for sale" + report_title = "Ogre city apartments for sale listed today" todays_date = datetime.today().strftime('%Y-%m-%d %H:%M') # creating pdf object instance @@ -165,20 +169,22 @@ def create_pdf_report(text_lines: list, msg_txt_lines: list) -> None: pdf.write(5, str_line) pdf.ln(5) - # pdf.image("test.png", 20,10, 150) # inserts png to pdf - pdf.ln(10) # ads new lines - pdf.add_page() # adds new page + pdf.ln(10) + pdf.add_page() test_save_df_to_png() # calling function to generate png from df - pdf.ln(10) # ads new lines - pdf.ln(10) # ads new lines - # pdf.image("test.png", 20,10, 150) # inserts png to pdf - pdf.image("1_rooms.png", 20,10, 150) # inserts png to pdf - pdf.add_page() # adds new page - pdf.image("2_rooms.png", 20,10, 150) # inserts png to pdf - pdf.add_page() # adds new page - pdf.image("1-4_rooms.png", 20,10, 150) # inserts png to pdf - pdf.ln(10) # ads new lines + pdf.ln(10) + pdf.ln(10) + pdf.image("1_rooms.png", 20,10, 150) + pdf.add_page() + pdf.image("2_rooms.png", 20,10, 150) + pdf.add_page() + pdf.image("3_rooms.png", 20,10, 150) + pdf.add_page() + pdf.image("4_rooms.png", 20,10, 150) + pdf.add_page() + pdf.image("1-4_rooms.png", 20,10, 150) + pdf.ln(10) pdf.output(name="Ogre_city_report.pdf") # generate pdf files diff --git a/app/wsmodules/run_analisys.py b/app/wsmodules/run_analisys.py new file mode 100644 index 0000000..6d41bf2 --- /dev/null +++ b/app/wsmodules/run_analisys.py @@ -0,0 +1,45 @@ +""" module docstring """ + +import pandas as pd + +data_frame = pd.read_csv('cleaned-sorted-df.csv') + +#segments = [ 'room_stats.txt', +# 'house_stats.txt', +# 'apt_loc_stats.txt' ] + + +#images = [ 'single_room_sqm_prices.png', +# 'double_room_sqm_prices.png', +# 'triple_room_sqm_prices.png', +# 'quad_room_sqm_prices.png', +# 'all_room_sqm_prices.png' ] +data_frames = [ ] + + +def run_analisys(): + for data_frame in data_frames: + analyze_data('room_stats', 'room_stats.txt', data_frame) + analyze_data('house_stats', 'house_stats.txt', data_frame) + analyze_data('apt_loc_stats', 'room_stats.txt', data_frame) + gen_image(data_frame, 'Price', 'Sqm') + + +def analyze_data(segment_type: str, file_name: str, + data_frame: pd.DataFrame) -> None: + """ TODO """ + pass + +def gen_image(data_frame: pd.DataFrame, xclmn: str, yclmn: str) -> None: + """Generate scatter plot based x and y axsis as data frame column values, + include title and save to *.png file""" + img_title = 'FIXME' + file_name = '{}_{}.png'.format(xclmn, yclmn) + ax = data_frame.plot.scatter( + x=xclmn, y=yclmn, s=100, title=img_title, grid=True) + fig = ax.get_figure() + fig.savefig(file_name) + + +run_analisys() + diff --git a/app/wsmodules/sendgrid_mailer.py b/app/wsmodules/sendgrid_mailer.py index 6f70707..c11cb55 100644 --- a/app/wsmodules/sendgrid_mailer.py +++ b/app/wsmodules/sendgrid_mailer.py @@ -10,33 +10,66 @@ import base64 import os import os.path + +import logging +from logging import handlers +from logging.handlers import RotatingFileHandler +import sys from sendgrid.helpers.mail import ( Mail, Attachment, FileContent, FileName, FileType, Disposition, ContentId) from sendgrid import SendGridAPIClient + +log = logging.getLogger('') +log.setLevel(logging.INFO) +fa_log_format = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] : %(funcName)s: %(lineno)d: %(message)s") +ch = logging.StreamHandler(sys.stdout) +ch.setFormatter(fa_log_format) +log.addHandler(ch) +fh = handlers.RotatingFileHandler('sendgrid_mailer.log', maxBytes=(1048576*5), backupCount=7) +fh.setFormatter(fa_log_format) +log.addHandler(fh) + + data_files = ['email_body_txt_m4.txt', 'Mailer_report.txt', 'Ogre-raw-data-report.txt', 'cleaned-sorted-df.csv', 'pandas_df.csv', - 'basic_price_stats.txt'] + 'basic_price_stats.txt', + '1_rooms_tmp.txt', + '1-4_rooms.png', + '1_rooms.png', + '2_rooms.png', + '3_rooms.png', + '4_rooms.png', + 'test.png', + 'mrv2.txt', + 'Ogre_city_report.pdf'] def remove_tmp_files() -> None: """FIXME: Refactor this function to better code""" + directory = os.getcwd() + log.info(f" --- current working dir {directory} --- ") for data_file in data_files: try: + log.info(f"Trying delete file: {data_file} ") os.remove(data_file) except OSError as e: print(f'Error: {data_file} : {e.strerror}') + log.info(f"Error deleting {data_file} : {e.strerror} ") def sendgrid_mailer_main() -> None: """Main module entry point""" - print("Debug info: Starting sendgrid mailer module ...") - with open('email_body_txt_m4.txt') as f: - file_content = f.readlines() + log.info(" --- Started sendgrid_mailer module --- ") + log.info(" Trying to open email_body_txt_m4.txt for email body content ") + with open('email_body_txt_m4.txt') as file_object: + file_content = file_object.readlines() + + log.info("Creating email body content from email_body_txt_m4.txt file ") email_body_content = ''.join([i for i in file_content[1:]]) # Creates Mail object instance @@ -47,17 +80,19 @@ def sendgrid_mailer_main() -> None: plain_text_content=email_body_content) report_file_exists = os.path.exists('Ogre_city_report.pdf') + log.info("Checking if file Ogre_city_report.pdf exists and reading as binary ") if report_file_exists: - # Binary read pdf file + # Binary read pdf file file_path = 'Ogre_city_report.pdf' - with open(file_path, 'rb') as f: - data = f.read() - f.close() + with open(file_path, 'rb') as file_object: + data = file_object.read() + file_object.close() # Encodes data with base64 for email attachment encoded_file = base64.b64encode(data).decode() # Creates instance of Attachment object + log.info("Attaching encoded Ogre_city_report.pdf to email object") attached_file = Attachment( file_content = FileContent(encoded_file), file_type = FileType('application/pdf'), @@ -69,15 +104,19 @@ def sendgrid_mailer_main() -> None: message.attachment = attached_file try: + log.info("Attempting to send email via Sendgrid API") sendgrid_client = SendGridAPIClient(os.environ.get('SENDGRID_API_KEY')) response = sendgrid_client.send(message) - print("Email sent response code:", response.status_code) - print(response.body, response.headers) + log.info(f"Email sent with response code: {response.status_code}") + log.info(f" --- Email response body --- ") + #log.info(f" {response.body} ") + log.info(f" --- Email response headers --- ") + #log.info(f" {response.headers}") except Exception as e: + log.info(f"{e.message}") print(e.message) - print("Debug info: Removing temp files ... ") remove_tmp_files() - print("Debug info: Completed sendgrid mailer module... ") + log.info(" --- Ended sendgrid_mailer module --- ") diff --git a/app/wsmodules/web_scraper.py b/app/wsmodules/web_scraper.py index 94b8978..e112be1 100644 --- a/app/wsmodules/web_scraper.py +++ b/app/wsmodules/web_scraper.py @@ -18,7 +18,7 @@ logger = logging.getLogger('web_scraper') -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.INFO) ws_log_format = logging.Formatter( "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(name)s : %(funcName)s: %(lineno)d: %(message)s") diff --git a/sslv_web_scraper/calc.py b/sslv_web_scraper/calc.py deleted file mode 100644 index 687053f..0000000 --- a/sslv_web_scraper/calc.py +++ /dev/null @@ -1,16 +0,0 @@ -""" This is fake module to run Travic CI pytest """ - -def sum(a, b): - return a + b - - -def sub(a, b): - return a - b - - -def mult(a, b): - return a * b - - -def div(a, b): - return a / b diff --git a/sslv_web_scraper/test_calc.py b/sslv_web_scraper/test_calc.py deleted file mode 100644 index 63863f9..0000000 --- a/sslv_web_scraper/test_calc.py +++ /dev/null @@ -1,25 +0,0 @@ -from sslv_web_scraper import calc - - -def test_calc_addition(): - """Verify the output of `calc_addition` function""" - output = calc.sum(2,4) - assert output == 6 - - -def test_calc_substraction(): - """Verify the output of `calc_substraction` function""" - output = calc.sub(2, 4) - assert output == -2 - - -def test_calc_multiply(): - """Verify the output of `calc_multiply` function""" - output = calc.mult(2,4) - assert output == 8 - - -def test_calc_division(): - """Verify the output of `calc_multiply` function""" - output = calc.div(8,4) - assert output == 2 diff --git a/task_scheduler/ts.py b/task_scheduler/ts.py index 981c79e..01ef087 100644 --- a/task_scheduler/ts.py +++ b/task_scheduler/ts.py @@ -11,7 +11,7 @@ log = logging.getLogger('task_scheduler') -log.setLevel(logging.DEBUG) +log.setLevel(logging.INFO) ts_log_format = logging.Formatter( "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] : %(funcName)s: %(lineno)d: %(message)s") @@ -46,6 +46,6 @@ def execute_ogre_task(): while True: - log.info('ts_loop: checking every 3000 sec if cheduled task needs to run again...') + log.info('ts_loop: checking every 8 hours if cheduled task needs to run again...') schedule.run_pending() - time.sleep(3000) + time.sleep(28800) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_01_module_web_scraper.py b/tests/test_01_module_web_scraper.py new file mode 100644 index 0000000..67524d1 --- /dev/null +++ b/tests/test_01_module_web_scraper.py @@ -0,0 +1,11 @@ +#from app.wsmodules.web_scraper import scrape_website + + +#def test_web_scraper_module(): +# """web_scraper module requires: +# 1. internet connection to ss.lv +# 2. generates output file Ogre-raw-data-report.txt that contains scraped data""" +# #output = calc.sum(2,4) +# #assert output == 6 +# pass + diff --git a/tests/test_02_module_data_formater.py b/tests/test_02_module_data_formater.py new file mode 100644 index 0000000..e964cf3 --- /dev/null +++ b/tests/test_02_module_data_formater.py @@ -0,0 +1,20 @@ +#from app.wsmodules.data_formater_v14 import get_file_path + + +def test_create_file(tmpdir): + p = tmpdir.mkdir("sub").join("hello.txt") + p.write("content") + assert p.read() == "content" + assert len(tmpdir.listdir()) == 1 + + +#def test_input_file_exists(): +# """TODO""" +# pass + + +#def test_ouput_file_exists(): +# """TODO""" +# #output = calc.sum(2,4) +# #assert output == 6 +# pass