Skip to content

Commit

Permalink
ci: check for clangd diagnostics
Browse files Browse the repository at this point in the history
This is a followup from #2340, which adds a check to CI for clangd
diagnostics. It is added to GCC because clang is the slower of the two
linux runners right now.

The script itself spawns clangd and speaks Language Server Protocol to
it. We open files, and clangd returns diagnostics (if any). This enables
a fairly straightforward way of checking an arbitrary number of source
files for warnings/errors.

Simply use `make clangd-diagnostics` to run the tool. The variables
`CLANGD_DIAGNOSTIC_INSTANCES` and `CLANGD_DIAGNOSTIC_JOBS` control the
parallelism.
  • Loading branch information
Riolku authored and benjaminwinger committed Nov 6, 2023
1 parent e5d8bd5 commit fa3b79f
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ jobs:
env:
NUM_THREADS: 32
TEST_JOBS: 16
CLANGD_DIAGNOSTIC_JOBS: 32
CLANGD_DIAGNOSTIC_INSTANCES: 6
GEN: ninja
CC: gcc
CXX: g++
Expand Down Expand Up @@ -66,6 +68,9 @@ jobs:
file: cover.info
functionalities: "search"

- name: Check for clangd diagnostics
run: make clangd-diagnostics

- name: C and C++ Examples
run: |
make example
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ NUM_THREADS ?= 1
TEST_JOBS ?= 10
SANITIZER_FLAG=
WERROR_FLAG=
CLANGD_DIAGNOSTIC_INSTANCES ?= 4
ROOT_DIR=$(CURDIR)

export CMAKE_BUILD_PARALLEL_LEVEL=$(NUM_THREADS)
Expand Down Expand Up @@ -107,6 +108,11 @@ tidy: clangd
run-clang-tidy -p build/release -quiet -j $(NUM_THREADS) \
"^$(realpath src)|$(realpath tools)/(?!shell/linenoise.cpp)|$(realpath examples)"

clangd-diagnostics: clangd
find src -name *.h -or -name *.cpp | xargs \
./scripts/get-clangd-diagnostics.py --compile-commands-dir build/release \
-j $(NUM_THREADS) --instances $(CLANGD_DIAGNOSTIC_INSTANCES)

pytest: release
cd $(ROOT_DIR)/tools/python_api/test && \
python3 -m pytest -v test_main.py
Expand Down
3 changes: 2 additions & 1 deletion scripts/dockerized-ci-tests-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ RUN wget https://apt.llvm.org/llvm.sh && \
rm llvm.sh && \
ln /usr/bin/clang++-17 /usr/bin/clang++ && \
ln /usr/bin/clang-17 /usr/bin/clang && \
ln /usr/bin/run-clang-tidy-17 /usr/bin/run-clang-tidy
ln /usr/bin/run-clang-tidy-17 /usr/bin/run-clang-tidy && \
ln /usr/bin/clangd-17 /usr/bin/clangd

ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
RUN useradd --create-home runner
Expand Down
233 changes: 233 additions & 0 deletions scripts/get-clangd-diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#!/usr/bin/env python
import argparse
import io
import json
import multiprocessing
import os
import subprocess
import sys


class LSPClient:
def __init__(self, *, compile_commands_dir=None, jobs=None, verbose=False):
args = ["clangd", "-j", str(jobs)]
if jobs is None:
jobs = multiprocessing.cpu_count()

if compile_commands_dir is not None:
args += ["--compile-commands-dir", compile_commands_dir]

self.id = 0
self.child = subprocess.Popen(
args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
# Let clangd inherit our stderr, or suppress it entirely.
stderr=None if verbose else subprocess.DEVNULL,
)
self.stdin = io.TextIOWrapper(self.child.stdin, newline="\r\n")
self.stdout = io.TextIOWrapper(self.child.stdout, newline="\r\n")

def request(self, method, params):
self.send_request(method, params)
return self.recv_response()

def send_request(self, method, params):
self.id += 1
self.send_json(
dict(
id=self.id,
jsonrpc="2.0",
method=method,
params=params,
)
)

def send_json(self, json_data):
data = json.dumps(json_data)
bindata = data.encode("utf-8")
header = f"Content-Length: {len(bindata)}\r\n\r\n"

self.stdin.write(header + data)
self.stdin.flush()

def recv_response(self):
json_data = self.recv_json()
assert json_data["id"] == self.id
assert "error" not in json_data
return json_data["result"]

def send_notif(self, method, params):
self.send_json(
dict(
jsonrpc="2.0",
method=method,
params=params,
)
)

def expect_notif(self, method):
json_data = self.recv_json()
assert json_data["method"] == method
assert "error" not in json_data
return json_data["params"]

def recv_json(self):
header = self.stdout.readline()
content_len_header = "Content-Length: "
assert header.startswith(content_len_header)
assert header.endswith("\r\n")
data_len = int(header[len(content_len_header) : -2])

# Expect end of header
assert self.stdout.read(2) == "\r\n"

data = self.stdout.read(data_len)
return json.loads(data)

def initialize(self, project):
return self.request(
"initialize",
dict(
processId=os.getpid(),
rootUri="file://" + project,
capabilities={},
),
)

def open_file(self, file):
with open(file) as f:
file_content = f.read()

self.send_notif(
"textDocument/didOpen",
dict(
textDocument=dict(
uri="file://" + os.path.realpath(file),
languageId="cpp",
version=1,
text=file_content,
)
),
)

def show_diagnostics(self):
diagnostics_response = self.expect_notif("textDocument/publishDiagnostics")
uri = diagnostics_response["uri"]
file_prefix = "file://"
assert uri.startswith(file_prefix)
file = uri[len(file_prefix) :]
diagnostics = diagnostics_response["diagnostics"]
for diagnostic in diagnostics:
LSPClient.show_diagnostic(file, diagnostic)

return len(diagnostics) != 0

def show_diagnostic(file, diagnostic):
range = diagnostic["range"]
start = range["start"]
line = start["line"]
message = diagnostic["message"]
print(f"{file}:{line}:{message}")

def send_shutdown(self):
self.send_request("shutdown", None)

def recv_shutdown_send_exit(self):
self.send_notif("exit", None)

def wait(self):
assert self.child.wait() == 0


def shutdown_all(clients):
# Shutdown the clients in parallel. This drastically speeds up cleanup time.
for client in clients:
client.send_shutdown()

for client in clients:
client.recv_shutdown_send_exit()

for client in clients:
client.wait()


def get_clients(client_count, compile_commands_dir, total_jobs, verbose):
# Distribute jobs evenly.
# Clients near the front get more jobs, but they also may get more files.
job_count = [0] * client_count
for i in range(total_jobs):
job_count[i % client_count] += 1
return [LSPClient(compile_commands_dir=compile_commands_dir, jobs=jobs, verbose=verbose) for jobs in job_count]


def get_diagnostics(files, *, client_count, compile_commands_dir, total_jobs, verbose):
if client_count > total_jobs:
print(f"Client count {client_count} is greater than total jobs {total_jobs}. Forcing the client count to {total_jobs}.", file=sys.stderr)
client_count = total_jobs

project = os.getcwd()

client_count = min(client_count, len(files))
clients = get_clients(client_count, compile_commands_dir, total_jobs, verbose)
for client in clients:
client.initialize(project)

# Similar to distributing jobs, we distribute files evenly.
file_counts = [0] * client_count
for i, file in enumerate(files):
client_idx = i % client_count
clients[client_idx].open_file(file)
file_counts[client_idx] += 1

any_diagnostic = 0
for file_count, client in zip(file_counts, clients):
for _ in range(file_count):
any_diagnostic |= client.show_diagnostics()

shutdown_all(clients)
return any_diagnostic


def main():
parser = argparse.ArgumentParser(
prog="get-clangd-diagnostics.py",
description="Scan project for any clangd diagnostics (including warnings) and outputs them.",
)
parser.add_argument("files", nargs="+", help="Files to scan")
parser.add_argument(
"--instances",
type=int,
default=4,
help="Number of clangd instances to spawn in parallel. Defaults to 4.",
)
parser.add_argument(
"-j",
"--jobs",
type=int,
help="Number of total jobs across all servers. Defaults to the CPU count.",
)
parser.add_argument(
"-p",
"--compile-commands-dir",
help="Directory containining compile_commands.json",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Show clangd debug output",
)

args = parser.parse_args(sys.argv[1:])
jobs = args.jobs if args.jobs is not None else multiprocessing.cpu_count()
return get_diagnostics(
args.files,
client_count=args.instances,
compile_commands_dir=args.compile_commands_dir,
total_jobs=jobs,
verbose=args.verbose,
)


sys.exit(main())
4 changes: 1 addition & 3 deletions src/binder/bind/ddl/bind_create_rdf_graph.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#include "binder/binder.h"
#include "binder/ddl/bound_create_table.h"
#include "catalog/rdf_graph_schema.h"
#include "catalog/rel_table_schema.h"
#include "common/keyword/rdf_keyword.h"
#include "common/types/rdf_variant_type.h"
#include "parser/ddl/create_table.h"
#include "parser/ddl/create_table_info.h"

using namespace kuzu::parser;
using namespace kuzu::common;
Expand Down
1 change: 0 additions & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <cstdint>
#include <string_view>

namespace kuzu {
namespace common {
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/keyword/rdf_keyword.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include <string>
#include <string_view>

namespace kuzu {
namespace common {
Expand Down
1 change: 1 addition & 0 deletions src/include/common/null_mask.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <algorithm>
#include <memory>
#include <utility>

Expand Down

0 comments on commit fa3b79f

Please sign in to comment.