From ece6b128aa80d78fcbd5be0b215c2bf28e9c0bfa Mon Sep 17 00:00:00 2001 From: John Matthews Date: Mon, 22 Jul 2024 10:32:51 -0400 Subject: [PATCH] :sparkles: Allow logging to disk (#237) * WIP logging updates Signed-off-by: John Matthews * Logging to disk via podman compose working Signed-off-by: John Matthews * Tweak to log levels Signed-off-by: John Matthews * Cleanup tweaks Signed-off-by: John Matthews * Ensure 'load-data' from podman compose up can log to disk Signed-off-by: John Matthews --------- Signed-off-by: John Matthews --- compose.yaml | 7 +- example/run_demo.py | 13 +- kai/__init__.py | 119 ------------------- kai/config.toml | 2 + kai/embedding_provider.py | 3 +- kai/hub_importer.py | 4 +- kai/kai_logging.py | 63 +++++++++- kai/llm_io_handler.py | 4 +- kai/models/file_solution.py | 3 +- kai/models/kai_config.py | 4 + kai/report.py | 3 +- kai/result.py | 4 +- kai/scm.py | 3 +- kai/server.py | 39 +++--- kai/service/incident_store/incident_store.py | 4 +- kai/service/incident_store/psql.py | 17 +++ samples/fetch_apps.py | 5 +- 17 files changed, 138 insertions(+), 159 deletions(-) diff --git a/compose.yaml b/compose.yaml index 0d0d05a7..f5ebb527 100644 --- a/compose.yaml +++ b/compose.yaml @@ -4,7 +4,8 @@ x-common-variables: &kai-variables DEMO_MODE: "False" HUB_URL: ${HUB_URL:-https://tackle-konveyor-tackle.apps.example.com/hub} IMPORTER_ARGS: - LOGLEVEL: info + LOG_LEVEL: info + LOG_DIR: "/podman_compose/logs" NUM_WORKERS: 8 USE_HUB_IMPORTER: ${USE_HUB_IMPORTER:-False} @@ -27,7 +28,7 @@ services: OPENAI_API_KEY: image: ${IMAGE:-quay.io/konveyor/kai}:${TAG:-stable} volumes: - - ${PWD}:/podman_compose:ro,z + - ${PWD}:/podman_compose:rw,z ports: - "8080:8080" depends_on: @@ -39,7 +40,7 @@ services: MODE: importer image: ${IMAGE:-quay.io/konveyor/kai}:${TAG:-stable} volumes: - - ${PWD}:/podman_compose:ro,z + - ${PWD}:/podman_compose:rw,z depends_on: - kai_db profiles: diff --git a/example/run_demo.py b/example/run_demo.py index 59bf2c37..bf313643 100755 --- a/example/run_demo.py +++ b/example/run_demo.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import json +import logging import os import sys import time @@ -12,8 +13,10 @@ # Ensure that we have 'kai' in our import path sys.path.append("../../kai") -from kai import Report -from kai.kai_logging import KAI_LOG +from kai.kai_logging import formatter +from kai.report import Report + +KAI_LOG = logging.getLogger(__name__) SERVER_URL = "http://0.0.0.0:8080" APP_NAME = "coolstore" @@ -249,7 +252,11 @@ def run_demo(report): if __name__ == "__main__": - KAI_LOG.setLevel("info".upper()) + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + KAI_LOG.addHandler(console_handler) + KAI_LOG.setLevel("DEBUG") + start = time.time() coolstore_analysis_dir = "./analysis/coolstore/output.yaml" r = Report.load_report_from_file(coolstore_analysis_dir) diff --git a/kai/__init__.py b/kai/__init__.py index 650f3222..f102a9ca 100644 --- a/kai/__init__.py +++ b/kai/__init__.py @@ -1,120 +1 @@ __version__ = "0.0.1" - -from typing import Annotated, List - -import typer - -from kai.report import Report -from kai.result import LLMResult -from kai.service.incident_store.incident_store import IncidentStore - -# from typing_extensions import Annotated - - -app = typer.Typer() - -# report_app = typer.Typer() -# result_app = typer.Typer() - -# app.add_typer(report_app, name="report", help="Generate a markdown report from a raw analysis yaml.") -# app.add_typer(result_app, name="result", help="Generate patches for given violations and incidents from an analysis yaml") - - -@app.command() -def report(analysis_path: str, output_dir: str): - """ - Generate a Markdown report of a given analysis - YAML to be read by a human - """ - report = Report.load_report_from_file(analysis_path) - r = dict(report) - print(f"We have results from {len(r.keys())} RuleSet(s) in {analysis_path}\n") - report.write_markdown(output_dir) - - -@app.command() -def generate( - path_to_report: str, - path_to_source: str, - example_initial_branch: str, - example_solved_branch: str, - path_to_output: str, - limit_rulesets: Annotated[List[str], typer.Option("--ruleset", "-r")] = None, - limit_violations: Annotated[List[str], typer.Option("--violation", "-v")] = None, - model: Annotated[str, typer.Option("--model", "-m")] = "gpt-3.5-turbo-16k", -): - # model: Annotated[Optional[str], typer.Argument()] = "gpt-3.5-turbo-16k"): - """ - Generate patches for given violations and incidents from an analysis yaml report - - - path_to_report: Path to the analysis yaml report - - - path_to_source: Path to the source code to be patched - - - example_initial_branch: Branch name for the initial state of the source code - - - example_solved_branch: Branch name for the solved state of the source code - - - path_to_output: Path to the output directory for the patches - - - limit_rulesets: Limit to specific rulesets (defaults to 'None', meaning to run all) - - - limit_violations: Limit to specific violations (defaults to 'None', meaning to run all) - - - model: Model name to use for generating the patches (defaults to 'gpt-3.5-turbo-16k', 'gpt-4-1106-preview' is another good option) - """ - print( - f"Generating patches for {path_to_report} for example app at {path_to_source}" - ) - print(f"Initial branch: {example_initial_branch}") - print(f"Solved branch: {example_solved_branch}") - print(f"Output directory: {path_to_output}") - print(f"Model: {model}") - print(f"Limit to ruleset(s): {limit_rulesets}") - print(f"Limit to violation(s): {limit_violations}") - - llmResult = LLMResult(path_to_source, example_initial_branch, example_solved_branch) - llmResult.parse_report(path_to_report) - llmResult.process(path_to_output, model, limit_rulesets, limit_violations) - print(f"Completed processing, output written to {path_to_output}\n") - - -@app.command() -def load(report_path: str, output_dir: str): - """ - Load the incident store with the given applications - write the cached_violations to a file for later use - """ - incident_store = IncidentStore(report_path, output_dir) - incident_store.load_incident_store() - - -@app.command() -def patch(ruleset: str, violation: str, report_path: str, output_dir: str): - """ - Generate patches for a specific violation - """ - print(f"Generating patches for {ruleset} - {violation}") - incident_store = IncidentStore(report_path, output_dir) - patches = incident_store.get_solved_issue(ruleset, violation) - if len(patches) == 0: - print(f"No patches found for {ruleset} - {violation}") - else: - for patch in patches: - print(f"Patch: {patch}") - return patches - - -@app.command() -def common(ruleset: str, violation: str, report_path: str, output_dir: str): - """ - Find common violations for a specific violation - """ - print(f"Finding common violations for {ruleset} - {violation}") - incident_store = IncidentStore(report_path, output_dir) - violations = incident_store.find_common_violations(ruleset, violation) - if violations is None: - print(f"No common violations found for {ruleset} - {violation}") - for violation in violations: - print(f"Violation: {violation}") - return violations diff --git a/kai/config.toml b/kai/config.toml index f507822e..16227cf3 100644 --- a/kai/config.toml +++ b/kai/config.toml @@ -1,4 +1,6 @@ log_level = "info" +file_log_level = "debug" +log_dir = "$pwd/logs" demo_mode = false trace_enabled = true diff --git a/kai/embedding_provider.py b/kai/embedding_provider.py index 9c424d26..44b9d915 100644 --- a/kai/embedding_provider.py +++ b/kai/embedding_provider.py @@ -1,3 +1,4 @@ +import logging import random from abc import ABC, abstractmethod from enum import Enum @@ -9,7 +10,7 @@ from psycopg2 import sql from psycopg2.extensions import connection -from kai.kai_logging import KAI_LOG +KAI_LOG = logging.getLogger(__name__) class TrimStrategy(Enum): diff --git a/kai/hub_importer.py b/kai/hub_importer.py index 927dcce1..288aeb74 100755 --- a/kai/hub_importer.py +++ b/kai/hub_importer.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import logging import os import pprint import tempfile @@ -13,11 +14,12 @@ from git import GitCommandError, Repo from pydantic import BaseModel, Field -from kai.kai_logging import KAI_LOG from kai.models.kai_config import KaiConfig from kai.report import Report from kai.service.incident_store import Application, IncidentStore +KAI_LOG = logging.getLogger(__name__) + # BaseModel that also acts as a dict class KaiBaseModel(BaseModel): diff --git a/kai/kai_logging.py b/kai/kai_logging.py index 9a8ca961..9ebfa131 100644 --- a/kai/kai_logging.py +++ b/kai/kai_logging.py @@ -1,10 +1,63 @@ import logging +import os -KAI_LOG = logging.getLogger(__name__) -console_handler = logging.StreamHandler() +from kai.models.kai_config import KaiConfig + +parent_log = logging.getLogger("kai") + +# console_handler = logging.StreamHandler() # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') formatter = logging.Formatter( - "%(levelname)s - %(asctime)s - [%(filename)20s:%(lineno)-4s - %(funcName)20s()] - %(message)s" + "%(levelname)s - %(asctime)s - %(name)s - [%(filename)20s:%(lineno)-4s - %(funcName)20s()] - %(message)s" ) -console_handler.setFormatter(formatter) -KAI_LOG.addHandler(console_handler) + + +def process_log_dir_replacements(log_dir: str): + ## + # We want to replace $pwd with the location of the Kai project directory, + # this is needed to help with specifying from configuration + ## + if log_dir.startswith("$pwd"): + log_dir = log_dir.replace( + "$pwd", os.path.join(os.path.dirname(os.path.realpath(__file__)), "../") + ) + return log_dir + + +def setup_console_handler(logger, log_level: str = "INFO"): + console_handler = logging.StreamHandler() + console_handler.setLevel(log_level) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + print(f"Console logging for '{parent_log.name}' is set to level '{log_level}'") + + +def setup_file_handler( + logger, log_file_name: str, log_dir: str, log_level: str = "DEBUG" +): + # Ensure any needed log directories exist + log_dir = process_log_dir_replacements(log_dir) + log_file_path = os.path.join(log_dir, log_file_name) + if log_dir.startswith("$pwd"): + log_dir = os.path.join(os.getcwd(), log_dir[5:]) + os.makedirs(os.path.dirname(log_file_path), exist_ok=True) + + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(log_level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + print( + f"File logging for '{logger.name}' is set to level '{log_level}' writing to file: '{log_file_path}'" + ) + + +def initLogging(console_log_level, file_log_level, log_dir, log_file="kai_server.log"): + setup_console_handler(parent_log, console_log_level) + setup_file_handler(parent_log, log_file, log_dir, file_log_level) + # Attempt to set the parent log level to + # most persmissive and allow child loggers to control what is filtered or not + parent_log.setLevel("DEBUG") + + +def initLoggingFromConfig(config: KaiConfig): + initLogging(config.log_level.upper(), config.file_log_level.upper(), config.log_dir) diff --git a/kai/llm_io_handler.py b/kai/llm_io_handler.py index f6b4d824..12e06ef3 100644 --- a/kai/llm_io_handler.py +++ b/kai/llm_io_handler.py @@ -1,4 +1,5 @@ import itertools +import logging import os import time import traceback @@ -16,7 +17,6 @@ ) from kai.constants import PATH_TEMPLATES -from kai.kai_logging import KAI_LOG from kai.model_provider import ModelProvider from kai.models.file_solution import guess_language, parse_file_solution_content from kai.service.incident_store.incident_store import IncidentStore @@ -25,6 +25,8 @@ LLM_RETRIES = 5 LLM_RETRY_DELAY = 10 +KAI_LOG = logging.getLogger(__name__) + def get_prompt( model_provider: ModelProvider, diff --git a/kai/models/file_solution.py b/kai/models/file_solution.py index 51b5c867..3e27e089 100644 --- a/kai/models/file_solution.py +++ b/kai/models/file_solution.py @@ -1,10 +1,11 @@ +import logging import re from pydantic import BaseModel from pygments import lexers from pygments.util import ClassNotFound -from kai.kai_logging import KAI_LOG +KAI_LOG = logging.getLogger(__name__) class FileSolutionContent(BaseModel): diff --git a/kai/models/kai_config.py b/kai/models/kai_config.py index ab606842..a6166e36 100644 --- a/kai/models/kai_config.py +++ b/kai/models/kai_config.py @@ -92,6 +92,10 @@ class KaiConfigModels(BaseModel): class KaiConfig(BaseModel): log_level: str = "info" + file_log_level: str = "info" + log_dir: str = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "../../logs" + ) demo_mode: bool = False trace_enabled: bool = False diff --git a/kai/report.py b/kai/report.py index 7383b5cd..9ee01f8d 100644 --- a/kai/report.py +++ b/kai/report.py @@ -2,13 +2,14 @@ import hashlib import json +import logging import os import shutil from io import StringIO import yaml -from kai.kai_logging import KAI_LOG +KAI_LOG = logging.getLogger(__name__) class Report: diff --git a/kai/result.py b/kai/result.py index a1bd792e..a2547e01 100644 --- a/kai/result.py +++ b/kai/result.py @@ -1,5 +1,6 @@ __all__ = ["LLMResult"] +import logging import os from langchain.chains import LLMChain @@ -7,11 +8,12 @@ from langchain_community.chat_models import ChatOpenAI from kai.constants import PATH_TEMPLATES -from kai.kai_logging import KAI_LOG from .report import Report from .scm import GitDiff +KAI_LOG = logging.getLogger(__name__) + class LLMResult: """The intent of this class is to help us form several Prompt examples using a single application diff --git a/kai/scm.py b/kai/scm.py index fbd7d7c2..5921c365 100644 --- a/kai/scm.py +++ b/kai/scm.py @@ -1,9 +1,10 @@ __all__ = ["GitDiff"] +import logging from git import BadName, Repo -from kai.kai_logging import KAI_LOG +KAI_LOG = logging.getLogger(__name__) class GitDiff: diff --git a/kai/server.py b/kai/server.py index d5db510f..6dc3d846 100644 --- a/kai/server.py +++ b/kai/server.py @@ -5,6 +5,7 @@ import argparse import datetime import json +import logging import os import pprint import time @@ -19,13 +20,15 @@ from kai import llm_io_handler from kai.constants import PATH_KAI -from kai.kai_logging import KAI_LOG +from kai.kai_logging import initLoggingFromConfig from kai.model_provider import ModelProvider from kai.models.analyzer_types import Incident from kai.models.kai_config import KaiConfig from kai.report import Report from kai.service.incident_store.incident_store import Application, IncidentStore +log = logging.getLogger(__name__) + # TODO: Make openapi spec for everything # TODO: Repo lives both on client and on server. Determine either A) Best way to @@ -45,7 +48,7 @@ @routes.post("/health_check") async def post_dummy_json_request(request: Request): - KAI_LOG.debug(f"health_check recv'd: {request}") + log.debug(f"health_check recv'd: {request}") request_json: dict = await request.json() @@ -112,7 +115,7 @@ async def post_get_incident_solution(request: Request): Stateful, stores it """ - KAI_LOG.debug(f"post_get_incident_solution recv'd: {request}") + log.debug(f"post_get_incident_solution recv'd: {request}") params = PostGetIncidentSolutionParams.model_validate(await request.json()) @@ -210,10 +213,10 @@ class PostGetIncidentSolutionsForFileParams(BaseModel): @routes.post("/get_incident_solutions_for_file") async def get_incident_solutions_for_file(request: Request): start = time.time() - KAI_LOG.debug(f"get_incident_solutions_for_file recv'd: {request}") + log.debug(f"get_incident_solutions_for_file recv'd: {request}") params = PostGetIncidentSolutionsForFileParams.model_validate(await request.json()) - KAI_LOG.info( + log.info( f"START - App: '{params.application_name}', File: '{params.file_name}' with {len(params.incidents)} incidents'" ) @@ -247,7 +250,7 @@ async def get_incident_solutions_for_file(request: Request): finally: end = time.time() trace.end(end) - KAI_LOG.info( + log.info( f"END - completed in '{end-start}s: - App: '{params.application_name}', File: '{params.file_name}' with {len(params.incidents)} incidents'" ) @@ -271,38 +274,38 @@ def app(): else: raise FileNotFoundError("Config file not found.") - if os.getenv("LOGLEVEL") is not None: - config.log_level = os.getenv("LOGLEVEL").upper() + if os.getenv("LOG_LEVEL") is not None: + config.log_level = os.getenv("LOG_LEVEL").upper() if os.getenv("DEMO_MODE") is not None: config.demo_mode = os.getenv("DEMO_MODE").lower() == "true" if os.getenv("TRACE") is not None: config.trace_enabled = os.getenv("TRACE").lower() == "true" + if os.getenv("LOG_DIR") is not None: + config.log_dir = os.getenv("LOG_DIR") print(f"Config loaded: {pprint.pformat(config)}") - - config: KaiConfig webapp["config"] = config + initLoggingFromConfig(config) - KAI_LOG.setLevel(config.log_level.upper()) print( f"Logging for KAI has been initialized and the level set to {config.log_level.upper()}" ) - KAI_LOG.info( + log.info( f"Tracing of actions is {'enabled' if config.trace_enabled else 'disabled'}" ) if config.demo_mode: - KAI_LOG.info("DEMO_MODE is enabled. LLM responses will be cached") + log.info("DEMO_MODE is enabled. LLM responses will be cached") webapp["model_provider"] = ModelProvider(config.models) - KAI_LOG.info(f"Selected provider: {config.models.provider}") - KAI_LOG.info(f"Selected model: {webapp['model_provider'].model_id}") + log.info(f"Selected provider: {config.models.provider}") + log.info(f"Selected model: {webapp['model_provider'].model_id}") webapp["incident_store"] = IncidentStore.from_config( config.incident_store, webapp["model_provider"] ) - KAI_LOG.info(f"Selected incident store: {config.incident_store.provider}") + log.info(f"Selected incident store: {config.incident_store.provider}") webapp.add_routes(routes) @@ -315,7 +318,7 @@ def main(): arg_parser.add_argument( "-log", "--loglevel", - default=os.getenv("KAI_LOG_LEVEL", "info"), + default=os.getenv("LOG_LEVEL", "info"), choices=["debug", "info", "warning", "error", "critical"], help="""Provide logging level. Options: @@ -336,7 +339,7 @@ def main(): args, _ = arg_parser.parse_known_args() - os.environ["LOGLEVEL"] = str(args.loglevel) + os.environ["LOG_LEVEL"] = str(args.loglevel) os.environ["DEMO_MODE"] = str(args.demo_mode).lower() web.run_app(app()) diff --git a/kai/service/incident_store/incident_store.py b/kai/service/incident_store/incident_store.py index 66de755e..3c0e0ed5 100644 --- a/kai/service/incident_store/incident_store.py +++ b/kai/service/incident_store/incident_store.py @@ -1,6 +1,7 @@ import argparse import datetime import enum +import logging import os from abc import ABC, abstractmethod from dataclasses import dataclass @@ -24,7 +25,6 @@ from sqlalchemy.types import JSON from kai.constants import PATH_LOCAL_REPO -from kai.kai_logging import KAI_LOG from kai.model_provider import ModelProvider from kai.models.kai_config import ( KaiConfig, @@ -33,6 +33,8 @@ ) from kai.report import Report +KAI_LOG = logging.getLogger(__name__) + # These prefixes are sometimes in front of the paths, strip them. # Also strip leading slashes since os.path.join can't join two absolute paths KNOWN_PREFIXES = ( diff --git a/kai/service/incident_store/psql.py b/kai/service/incident_store/psql.py index 5744d693..e3ca07ea 100644 --- a/kai/service/incident_store/psql.py +++ b/kai/service/incident_store/psql.py @@ -1,5 +1,8 @@ +import os + from sqlalchemy import and_, create_engine +from kai.kai_logging import initLogging from kai.model_provider import ModelProvider from kai.models.kai_config import KaiConfigIncidentStorePostgreSQLArgs from kai.service.incident_store.incident_store import IncidentStore, SQLIncident, cmd @@ -27,4 +30,18 @@ def json_exactly_equal(self, json_dict: dict): if __name__ == "__main__": + if os.getenv("LOG_DIR") is not None: + log_dir = os.getenv("LOG_DIR") + else: + log_dir = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "../../../logs" + ) + + if os.getenv("LOG_LEVEL") is not None: + log_level = os.getenv("LOG_LEVEL").upper() + else: + log_level = "INFO" + + initLogging(log_level, "DEBUG", log_dir, "kai_psql.log") + cmd("postgresql") diff --git a/samples/fetch_apps.py b/samples/fetch_apps.py index 68e5bd8d..4df3ab92 100755 --- a/samples/fetch_apps.py +++ b/samples/fetch_apps.py @@ -12,9 +12,8 @@ def fetch_sample_apps(): for repo in repos: print(f"Cloning {repo}...") if os.path.exists(f"sample_repos/{repo}"): - print(f"*** Exiting since 'sample_repos/{repo}' already exists") - print("*** Delete 'sample_repos' and rerun this script") - sys.exit(1) + print(f"*** Skipping since 'sample_repos/{repo}' already exists") + continue gitCloneStatus = subprocess.run( # trunk-ignore(bandit) ["git", "clone", repos[repo][0], f"sample_repos/{repo}"] )