diff --git a/.gitignore b/.gitignore index bc948d62..371d1fa5 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ MANIFEST # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec +!build.spec # Installer logs pip-log.txt diff --git a/kai/constants.py b/kai/constants.py index 26b4661d..255cfaae 100644 --- a/kai/constants.py +++ b/kai/constants.py @@ -1,5 +1,6 @@ import os import pathlib +import sys """ This file exists because we need to define some constants - specifically file @@ -9,6 +10,10 @@ PATH_KAI = os.path.dirname(os.path.abspath(__file__)) +# pyinstaller sets sys attributes to help determine when program runs in bin +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + PATH_KAI = sys._MEIPASS + PATH_GIT_ROOT = os.path.join(PATH_KAI, "..") PATH_DATA = os.path.join(PATH_KAI, "data") diff --git a/kai/kai_logging.py b/kai/kai_logging.py index 2b79d0ea..2b6bcadd 100644 --- a/kai/kai_logging.py +++ b/kai/kai_logging.py @@ -33,7 +33,11 @@ def setup_console_handler(logger, log_level: str = "INFO"): def setup_file_handler( - logger, log_file_name: str, log_dir: str, log_level: str = "DEBUG" + logger, + log_file_name: str, + log_dir: str, + log_level: str = "DEBUG", + silent: bool = False, ): # Ensure any needed log directories exist log_dir = process_log_dir_replacements(log_dir) @@ -46,9 +50,10 @@ def setup_file_handler( file_handler.setLevel(log_level) file_handler.setFormatter(formatter) logger.addHandler(file_handler) - print( - f"File logging for '{logger.name}' is set to level '{log_level}' writing to file: '{log_file_path}'" - ) + if not silent: + print( + f"File logging for '{logger.name}' is set to level '{log_level}' writing to file: '{log_file_path}'" + ) def initLogging(console_log_level, file_log_level, log_dir, log_file="kai_server.log"): diff --git a/playpen/.gitignore b/playpen/.gitignore new file mode 100644 index 00000000..234ebe15 --- /dev/null +++ b/playpen/.gitignore @@ -0,0 +1,4 @@ +build/** +node_modules/** +package.json +package-lock.json \ No newline at end of file diff --git a/playpen/README.md b/playpen/README.md index 2b69dbcb..9275695a 100644 --- a/playpen/README.md +++ b/playpen/README.md @@ -1,3 +1,86 @@ # Playpen Playpen is intended to be a location for exploring and sharing concepts. The material created under this directory may be broken and contain approaches that end up not being useful, or the material here may help to rapidly try out a concept that ends up being incorporated into Kai. + +## PyInstaller and JSON-RPC Client + +The goals of this effort are: + +- Figure out how we can enable communication between the Kai Client and different IDE extensions (possibly running in restricted environments) in a uniform way. +- Figure out a way to package the client into an independent binary that can be run on different platforms. + +As of writing this, here's the progress we made on the goals above: + +- We have a JSON-RPC interface in front of the Client CLI. The JSON-RPC interface can be found in [./client/rpc.py](./client/rpc.py). It exposes `get_incident_solutions_for_file` function that generates a fix for one file. There are two example clients (Python and Javascript) we have written that talk with the interface over I/O streams. +- We have a `build.spec` file that builds the JSON-RPC client into a binary using PyInstaller. + +### Building JSON-RPC interface into a binary + +Before you can build the binary, you need to activate Kai virtual environment. Once venv is activated, you need to install Kai module in the env. To install Kai module, navigate to the Kai project root and run: + +```sh +pip install -e . +``` + +Now we install pyinstaller in current venv: + +```sh +pip install pyinstaller +``` + +Next, we run pyinstaller to generate a binary: + +```sh +pyinstaller build.spec +``` + +Once successful, a binary will be generated at `./dist/cli`. + +### Testing JSON-RPC binary + +Now that we have built our JSON-RPC interface into a binary, we will test it using a Python and a JS client that communicates. Both of these clients use a hardcoded path `./dist/cli` to run the JSON-RPC server. Make sure you have built the binary before moving forward. + +#### Testing with Python client + +To run the Python JSON-RPC client, install a dependency: + +```sh +pip install pylspclient +``` + +To run the client: + +```sh +python rpc-client.py +``` + +See [arguments](#client-arguments) for help on arguments above. + +#### Testing with JS client + +To run the Javascript client, install a dependency: + +```sh +npm install vscode-jsonrpc +``` + +To run the client: + +```sh +node rpc-client.js +``` + +##### Client arguments + +Both the Python and JS clients take exactly the same arguments in order: + +- : Absolute path to the Kai config you want to use to generate fix +- : The name of the application you're analyzing +- : Absolute path to an analysis report containing incidents +- : Absolute path to the input file for which you want to generate incidents + +When successful, both clients will print the updated file followed by the following message: + +```sh +Received response successfully! +``` diff --git a/playpen/build.spec b/playpen/build.spec new file mode 100644 index 00000000..0d256c89 --- /dev/null +++ b/playpen/build.spec @@ -0,0 +1,56 @@ +# -*- mode: python ; coding: utf-8 -*- + +# This is a PyInstaller build spec to build Kai Client into a binary +# To run this spec, activate Kai venv and run `pyinstaller ./build.spec` + +import sys +import os +from PyInstaller.building.datastruct import Tree +from PyInstaller.building.build_main import Analysis +from PyInstaller.building.api import PYZ, EXE, COLLECT +from PyInstaller.utils.hooks import collect_data_files + +data_dirs = [ + ('../kai/data/templates', 'data/templates'), +] + +script_path = 'client/rpc.py' + +a = Analysis( + [script_path], + pathex=[os.path.dirname(script_path)], + binaries=[], + datas=data_dirs, + hiddenimports=["_ssl"], + hookspath=[], + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + # cipher=None, + noarchive=False, +) + +pyz = PYZ(a.pure, a.zipped_data) + +exe = EXE( + pyz, + a.scripts, + a.binaries, + a.datas, + [], + name="cli", + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) + diff --git a/playpen/client/rpc.py b/playpen/client/rpc.py new file mode 100644 index 00000000..822da1c4 --- /dev/null +++ b/playpen/client/rpc.py @@ -0,0 +1,311 @@ +#!/usr/bin/python3 + +"""This module is intended to facilitate using Konveyor with LLMs.""" + +import json +import logging +import os +import sys +import time +import traceback +from typing import Any, Dict, List +from warnings import filterwarnings + +from cli import ( + generate_fix, + get_config, + get_impacted_files_from_report, + get_model_provider, + get_trace, + render_prompt, +) +from pylspclient.json_rpc_endpoint import JsonRpcEndpoint, MyEncoder +from pylspclient.lsp_client import LspEndpoint as RpcServer +from pylspclient.lsp_errors import ErrorCodes, ResponseError + +from kai.kai_logging import parent_log, setup_file_handler +from kai.models.report_types import ExtendedIncident + +log = logging.getLogger("kai-rpc") + +JSON_RPC_REQ_FORMAT = "Content-Length: {json_string_len}\r\n\r\n{json_string}" +LEN_HEADER = "Content-Length: " +TYPE_HEADER = "Content-Type: " + + +class CustomRpcServer(RpcServer): + def run(self): + while not self.shutdown_flag: + try: + jsonrpc_message = self.json_rpc_endpoint.recv_response() + if jsonrpc_message is None: + log.debug("server quit") + break + method = jsonrpc_message.get("method") + result = jsonrpc_message.get("result") + error = jsonrpc_message.get("error") + rpc_id = jsonrpc_message.get("id") + params = jsonrpc_message.get("params") + + if method: + if rpc_id is not None: + # a call for method + if method not in self.method_callbacks: + raise ResponseError( + ErrorCodes.MethodNotFound, + "Method not found: {method}".format(method=method), + ) + result = self.method_callbacks[method](**params["kwargs"]) + self.send_response(rpc_id, result, None) + else: + if method not in self.notify_callbacks: + log.debug( + "Notify method not found: {method}.".format( + method=method + ) + ) + else: + self.notify_callbacks[method](params) + else: + self.handle_result(rpc_id, result, error) + except ResponseError as e: + self.send_response(rpc_id, None, e) + + +class CustomRpcEndpoint(JsonRpcEndpoint): + def __add_header(self, json_string: str): + return JSON_RPC_REQ_FORMAT.format( + json_string_len=len(json_string), json_string=json_string + ) + + def send_request(self, message): + json_string = json.dumps(message, cls=MyEncoder) + jsonrpc_req = self.__add_header(json_string) + log.debug(f"Sending data over stdin {jsonrpc_req}") + with self.write_lock: + self.stdin.write(jsonrpc_req) + self.stdin.flush() + + def recv_response(self): + with self.read_lock: + message_size = None + while True: + line = self.stdout.readline() + if not line: + return None + if not line.endswith("\r\n") and not line.endswith("\n"): + raise ResponseError( + ErrorCodes.ParseError, "Bad header: missing newline" + ) + line = line[: -2 if line.endswith("\r\n") else -1] + if line == "": + break + elif line.startswith(LEN_HEADER): + line = line[len(LEN_HEADER) :] + if not line.isdigit(): + raise ResponseError( + ErrorCodes.ParseError, "Bad header: size is not int" + ) + message_size = int(line) + elif line.startswith(TYPE_HEADER): + pass + else: + raise ResponseError( + ErrorCodes.ParseError, "Bad header: unkown header" + ) + if not message_size: + raise ResponseError(ErrorCodes.ParseError, "Bad header: missing size") + + jsonrpc_res = self.stdout.read(message_size) + log.debug(f"Read data from stdout {jsonrpc_res}") + return json.loads(jsonrpc_res) + + +class RPCParams: + def __init__(self, **kwargs): + self._data = kwargs + + def json(self): + return json.dumps(self._data) + + @property + def data(self) -> Dict[str, Any]: + return self._data + + @data.setter + def data(self, data) -> Dict[str, Any]: + self._data = data + + @property + def app_name(self) -> str: + return self._data.get("app_name") + + @app_name.setter + def app_name(self, name: str): + self._data["app_name"] = name + + @property + def config_path(self) -> str: + return self._data.get("config_path") + + @config_path.setter + def config_path(self, path: str): + self._validate_path(path) + self._data["config_path"] = path + + @property + def input_file_path(self) -> str: + return self._data.get("input_file_path") + + @input_file_path.setter + def input_file_path(self, path: str) -> str: + self._validate_path(path) + self._data["input_file_path"] = path + + @property + def incidents(self) -> List[ExtendedIncident]: + return self._data.get("incidents", []) + + @incidents.setter + def incidents(self, incidents: List[ExtendedIncident]): + self._data["incidents"] = incidents + + @property + def report_path(self) -> str: + return self._data.get("report_path", []) + + @report_path.setter + def report_path(self, path: str): + self._validate_path(path) + self._data["report_path"] = path + + @property + def log_level(self) -> str: + return self._data.get("log_level", "INFO") + + @log_level.setter + def log_level(self, level: str): + self._data["log_level"] = level + + def _validate_path(self, path: str, dir: bool = False) -> bool: + if not os.path.exists(path): + raise ResponseError( + ErrorCodes.InvalidParams, + f"path {path} does not exist", + ) + if dir and not os.path.isdir(path): + raise ResponseError( + ErrorCodes.InvalidParams, f"file path is not a directory {path}" + ) + if not dir and os.path.isdir(path): + raise ResponseError( + ErrorCodes.InvalidParams, f"file path is a directory {path}" + ) + + +class KaiClientRPCServer: + def get_incident_solutions_for_file(self, **kwargs) -> str: + rpc_params = RPCParams(**kwargs) + log.debug(f"got rpc params {rpc_params}") + config = get_config(rpc_params.config_path) + + setup_file_handler( + parent_log, + "kai_server.log", + config.log_dir, + rpc_params.log_level, + silent=True, + ) + + model_provider = get_model_provider(config.models) + + incidents = [] + if rpc_params.incidents: + incidents = json.loads(rpc_params.incidents) + elif rpc_params.report_path: + impacted_files = get_impacted_files_from_report(rpc_params.report_path) + for k, v in impacted_files.items(): + if rpc_params.input_file_path.endswith(k): + incidents = v + break + else: + raise ResponseError(ErrorCodes.RequestCancelled, "no incidents to fix") + else: + raise ResponseError( + ErrorCodes.InvalidParams, + "either params.incidents or params.report_path is required", + ) + + file_contents = "" + with open(rpc_params.input_file_path, "r") as f: + file_contents = f.read() + + start = time.time() + try: + trace = get_trace( + config, + model_provider, + "single", + rpc_params.app_name, + os.path.basename(rpc_params.input_file_path), + ) + trace.start(start) + prompt = render_prompt( + trace, + os.path.basename(rpc_params.input_file_path), + "java", + file_contents, + incidents, + model_provider, + ) + result = generate_fix( + trace, + config, + rpc_params.app_name, + "java", + rpc_params.input_file_path, + prompt, + model_provider, + ) + return result.updated_file + except Exception as e: + trace.exception(-1, -1, e, traceback.format_exc()) + log.debug(f"error processing file: {e}") + finally: + end = time.time() + trace.end(end) + log.info( + f"END - completed in '{end-start}s: - App: '{rpc_params.app_name}', File: '{rpc_params.input_file_path}' with {len(incidents)} incidents'" + ) + + raise ResponseError("failed to generate fix") + + +def run_rpc_server(): + # filter warnings so stdout is not polluted + filterwarnings("ignore", category=RuntimeWarning) + filterwarnings("ignore", category=DeprecationWarning) + file_handler = logging.FileHandler("server.log") + formatter = logging.Formatter( + "%(levelname)s - %(asctime)s - %(name)s - [%(filename)s:%(lineno)s - %(funcName)s()] - %(message)s" + ) + file_handler.setFormatter(formatter) + log.setLevel(logging.DEBUG) + log.addHandler(file_handler) + kai = KaiClientRPCServer() + # the library gives us a client, we use it as a LSP server by switching stdin / stdout + CustomRpcServer( + json_rpc_endpoint=CustomRpcEndpoint(sys.stdout, sys.stdin), + method_callbacks={ + "get_incident_solutions_for_file": kai.get_incident_solutions_for_file, + }, + timeout=60, + ).run() + + +if __name__ == "__main__": + try: + run_rpc_server() + except Exception as e: + log.error(f"{traceback.format_exc()}") + log.error(f"failed running the server {e}") diff --git a/playpen/package.json b/playpen/package.json new file mode 100644 index 00000000..124230a6 --- /dev/null +++ b/playpen/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "vscode-jsonrpc": "^8.2.1" + } +} diff --git a/playpen/rpc-client.js b/playpen/rpc-client.js new file mode 100644 index 00000000..b2e9b5d5 --- /dev/null +++ b/playpen/rpc-client.js @@ -0,0 +1,58 @@ +const cp = require("child_process"); +const rpc = require("vscode-jsonrpc/node"); +const fs = require("fs"); + +if (process.argv.length < 6) { + console.error( + "All arguments are required\nUsage: node rpc-client.js ", + ); + process.exit(1); +} + +const kaiConfigToml = process.argv[2]; +const appName = process.argv[3]; +const reportPath = process.argv[4]; +const inputFilePath = process.argv[5]; + +const binaryPath = "./dist/cli"; +if (!fs.existsSync(binaryPath)) { + console.error( + `Kai client binary not found at path ${binaryPath}, build a binary by running 'pyinstaller build.spec'`, + ); + process.exit(1); +} + +const params = { + app_name: appName, + report_path: reportPath, + input_file_path: inputFilePath, + config_path: kaiConfigToml, +}; + +let rpcServer = cp.spawn(binaryPath, [], { + stdio: ["pipe", "pipe", process.stderr], +}); + +setTimeout(() => { + let connection = rpc.createMessageConnection( + new rpc.StreamMessageReader(rpcServer.stdout), + new rpc.StreamMessageWriter(rpcServer.stdin), + ); + + console.log("created rpc process"); + connection.listen(); + connection + .sendRequest("get_incident_solutions_for_file", { kwargs: params }) + .then((result) => { + console.log(result); + console.log("\nReceived response successfully!"); + }) + .catch((error) => { + console.error(error); + console.error("error generating fix"); + }) + .finally(() => { + connection.dispose(); + rpcServer.kill(); + }); +}, 4000); diff --git a/playpen/rpc-client.py b/playpen/rpc-client.py new file mode 100644 index 00000000..786a5aea --- /dev/null +++ b/playpen/rpc-client.py @@ -0,0 +1,65 @@ +import logging + +# trunk-ignore-begin(bandit/B404) +import subprocess + +# trunk-ignore-end(bandit/B404) +import sys + +sys.path.append("./client") +from client import rpc as kaiRpcClient + + +def main(): + file_handler = logging.FileHandler("client.log") + kaiRpcClient.log.addHandler(file_handler) + kaiRpcClient.log.setLevel(logging.DEBUG) + if len(sys.argv) < 5: + print( + "All arguments are required\nUsage: node rpc-client.js " + ) + sys.exit(1) + + # "python", "client/rpc.py" + # "./dist/cli" + # trunk-ignore-begin(bandit/B603) + binary_path = "./dist/cli" + rpc_server = subprocess.Popen( + [binary_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + text=True, + ) + # trunk-ignore-end(bandit/B603) + + rpc = kaiRpcClient.CustomRpcServer( + json_rpc_endpoint=kaiRpcClient.CustomRpcEndpoint( + rpc_server.stdin, rpc_server.stdout + ), + timeout=60, + ) + rpc.start() + request_params = { + "config_path": sys.argv[1], + "app_name": sys.argv[2], + "report_path": sys.argv[3], + "input_file_path": sys.argv[4], + } + try: + print(f"running get_incident_solutions_for_file() with params {request_params}") + response = rpc.call_method( + "get_incident_solutions_for_file", + kwargs=request_params, + ) + print(response) + print("\nReceived response successfully!") + finally: + rpc_server.stdin.close() + rpc_server.stdout.close() + rpc_server.terminate() + rpc_server.wait() + rpc.stop() + + +if __name__ == "__main__": + main()