Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CLI utf8 issue #1080

Merged
merged 1 commit into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions tools/shell/embedded_shell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

#include "common/logging_level_utils.h"
#include "common/type_utils.h"
#include "utf8proc.h"
#include "utf8proc_wrapper.h"

using namespace kuzu::utf8proc;

namespace kuzu {
namespace main {
Expand Down Expand Up @@ -131,13 +135,35 @@ void completion(const char* buffer, linenoiseCompletions* lc) {

void highlight(char* buffer, char* resultBuf, uint32_t maxLen, uint32_t cursorPos) {
string buf(buffer);
auto bufLen = buf.length();
ostringstream highlightBuffer;
string word;
vector<string> tokenList;
if (cursorPos > maxLen) {
buf = buf.substr(cursorPos - maxLen, maxLen);
uint32_t counter = 0;
uint32_t thisChar = 0;
uint32_t lineLen = 0;
while (counter < cursorPos) {
counter += Utf8Proc::renderWidth(buffer, thisChar);
thisChar = utf8proc_next_grapheme(buffer, bufLen, thisChar);
}
lineLen = thisChar;
while (counter > cursorPos - maxLen + 1) {
counter -= Utf8Proc::renderWidth(buffer, thisChar);
thisChar = Utf8Proc::previousGraphemeCluster(buffer, bufLen, thisChar);
}
lineLen -= thisChar;
buf = buf.substr(thisChar, lineLen);
bufLen = buf.length();
} else if (buf.length() > maxLen) {
buf = buf.substr(0, maxLen);
uint32_t counter = 0;
uint32_t lineLen = 0;
while (counter < maxLen) {
counter += Utf8Proc::renderWidth(buffer, lineLen);
lineLen = utf8proc_next_grapheme(buffer, bufLen, lineLen);
}
buf = buf.substr(0, lineLen);
bufLen = buf.length();
}
for (auto i = 0u; i < buf.length(); i++) {
if ((buf[i] != ' ' && !word.empty() && word[0] == ' ') ||
Expand Down
143 changes: 101 additions & 42 deletions tools/shell/linenoise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@
*
*/

#include "linenoise.h"

#include <ctype.h>
#include <errno.h>
#include <poll.h>
Expand All @@ -119,6 +117,7 @@
#include <cstddef>
#include <string>

#include "linenoise.h"
#include "utf8proc.h"
#include "utf8proc_wrapper.h"
#include <sys/ioctl.h>
Expand Down Expand Up @@ -155,6 +154,7 @@ struct linenoiseState {
size_t pos; /* Current cursor position. */
size_t oldpos; /* Previous refresh cursor position. */
size_t len; /* Current edited line length. */
size_t chars; /* Number of utf-8 chars in buffer. */
size_t cols; /* Number of columns in terminal. */
size_t maxrows; /* Maximum num of rows used so far (multiline mode) */
int history_index; /* The history index we are currently editing. */
Expand Down Expand Up @@ -189,6 +189,8 @@ static void refreshLine(struct linenoiseState* l);
std::string oldInput = "";
bool inputLeft;

std::string utf8store = "";

/* Debugging macro. */
#if 0
FILE *lndebug_fp = NULL;
Expand Down Expand Up @@ -566,6 +568,19 @@ uint32_t linenoiseComputeRenderWidth(const char* buf, size_t len) {
return renderWidth;
}

int getBufCharPos(struct linenoiseState* l) {
if (l->pos == l->chars) {
return l->len;
}
size_t idx = 0;
size_t ctr = 0;
while (ctr < l->pos) {
idx = utf8proc_next_grapheme(l->buf, l->len, idx);
ctr++;
}
return idx;
}

/* Single line low level line refresh.
*
* Rewrite the currently edited line accordingly to the buffer content,
Expand All @@ -577,7 +592,9 @@ static void refreshSingleLine(struct linenoiseState* l) {
char buf[LINENOISE_MAX_LINE];
size_t len = l->len;
size_t pos = l->pos;
size_t chars = l->chars;
struct abuf ab;
uint32_t renderWidth = 0;
uint32_t renderPos = 0;

if (Utf8Proc::isValid(l->buf, l->len)) {
Expand All @@ -587,13 +604,15 @@ static void refreshSingleLine(struct linenoiseState* l) {
size_t charPos = 0;
size_t prevPos = 0;
size_t totalRenderWidth = 0;
uint32_t posCounter = 0;
while (charPos < len) {
uint32_t charRenderWidth = Utf8Proc::renderWidth(l->buf, charPos);
prevPos = charPos;
charPos = utf8proc_next_grapheme(l->buf, len, charPos);
totalRenderWidth += charPos - prevPos;
posCounter++;
totalRenderWidth += charRenderWidth;
if (totalRenderWidth >= remainingRenderWidth) {
if (prevPos >= l->pos) {
if (prevPos >= getBufCharPos(l)) {
// We passed the cursor: break, we no longer need to render.
charPos = prevPos;
break;
Expand All @@ -603,17 +622,18 @@ static void refreshSingleLine(struct linenoiseState* l) {
while (totalRenderWidth >= remainingRenderWidth) {
uint32_t startCharWidth = Utf8Proc::renderWidth(l->buf, startPos);
uint32_t newStart = utf8proc_next_grapheme(l->buf, len, startPos);
totalRenderWidth -= newStart - startPos;
totalRenderWidth -= startCharWidth;
startPos = newStart;
renderPos -= startCharWidth;
renderWidth -= startCharWidth;
}
}
}
if (prevPos < l->pos) {
if (posCounter <= l->pos) {
renderWidth += charRenderWidth;
renderPos += charRenderWidth;
}
}
highlightCallback(l->buf, buf, totalRenderWidth, l->pos);
highlightCallback(l->buf, buf, totalRenderWidth, renderPos);
len = strlen(buf);
} else {
// Invalid UTF8: fallback.
Expand All @@ -625,7 +645,7 @@ static void refreshSingleLine(struct linenoiseState* l) {
while (plen + len > l->cols) {
len--;
}
renderPos = pos;
renderWidth = pos;
}

abInit(&ab);
Expand All @@ -635,7 +655,7 @@ static void refreshSingleLine(struct linenoiseState* l) {
/* Write the prompt and the current buffer content */
abAppend(&ab, l->prompt, strlen(l->prompt));
if (maskmode == 1) {
while (len--)
while (chars--)
abAppend(&ab, "*", 1);
} else {
abAppend(&ab, buf, len);
Expand All @@ -646,8 +666,9 @@ static void refreshSingleLine(struct linenoiseState* l) {
snprintf(seq, 64, "\x1b[0K");
abAppend(&ab, seq, strlen(seq));
/* Move cursor to original position. */
snprintf(seq, 64, "\r\x1b[%dC", (int)(renderPos + plen));
snprintf(seq, 64, "\r\x1b[%dC", (int)(renderWidth + plen));
abAppend(&ab, seq, strlen(seq));

if (write(fd, ab.b, ab.len) == -1) {} /* Can't recover from write error. */
abFree(&ab);
}
Expand Down Expand Up @@ -769,51 +790,84 @@ bool pastedInput(int ifd) {
* On error writing to the terminal -1 is returned, otherwise 0. */
int linenoiseEditInsert(struct linenoiseState* l, char c) {
if (l->len < l->buflen) {
if (l->len == l->pos) {
l->buf[l->pos] = c;
l->pos++;
if (l->chars == l->pos) {
l->buf[l->len] = c;
l->len++;
l->buf[l->len] = '\0';
if ((!mlmode && l->plen + l->len < l->cols && !hintsCallback)) {

if ((!mlmode && !hintsCallback)) {
char d = (maskmode == 1) ? '*' : c;
if (write(l->ofd, &d, 1) == -1)
return -1;
if (!Utf8Proc::isValid(&d, 1)) {
utf8store += d;
if (Utf8Proc::isValid(utf8store.c_str(), utf8store.length())) {
utf8store = "";
l->chars++;
l->pos++;
}
} else {
if (l->plen + l->chars < l->cols) {
if (write(l->ofd, &d, 1) == -1)
return -1;
}
l->chars++;
l->pos++;
}
}
if (!pastedInput(l->ifd) && !inputLeft) {
refreshLine(l);
}
} else {
memmove(l->buf + l->pos + 1, l->buf + l->pos, l->len - l->pos);
l->buf[l->pos] = c;
l->len++;
l->pos++;
l->buf[l->len] = '\0';
refreshLine(l);
if (!Utf8Proc::isValid(&c, 1)) {
utf8store += c;
l->len++;
}
if (Utf8Proc::isValid(utf8store.c_str(), utf8store.length())) {
uint32_t charLoc = getBufCharPos(l);
uint32_t charWidth = std::max((uint32_t)utf8store.length(), 1u);
memmove(l->buf + charLoc + charWidth, l->buf + charLoc, l->len - charLoc);
if (utf8store != "") {
for (auto i = 0u; i < utf8store.length(); i++) {
l->buf[charLoc + i] = utf8store[i];
}
utf8store = "";
l->pos++;
l->chars++;
l->buf[l->len] = '\0';
refreshLine(l);
} else {
l->buf[l->pos] = c;
l->len++;
l->pos++;
l->chars++;
l->buf[l->len] = '\0';
refreshLine(l);
}
}
}
}
return 0;
}

static uint32_t prevChar(struct linenoiseState* l) {
return Utf8Proc::previousGraphemeCluster(l->buf, l->len, l->pos);
return Utf8Proc::previousGraphemeCluster(l->buf, l->len, getBufCharPos(l));
}

static uint32_t nextChar(struct linenoiseState* l) {
return utf8proc_next_grapheme(l->buf, l->len, l->pos);
return utf8proc_next_grapheme(l->buf, l->len, getBufCharPos(l));
}

/* Move cursor on the left. */
void linenoiseEditMoveLeft(struct linenoiseState* l) {
if (l->pos > 0) {
l->pos = prevChar(l);
l->pos--;
refreshLine(l);
}
}

/* Move cursor on the right. */
void linenoiseEditMoveRight(struct linenoiseState* l) {
if (l->pos != l->len) {
l->pos = nextChar(l);
if (l->pos != l->chars) {
l->pos++;
refreshLine(l);
}
}
Expand All @@ -829,19 +883,19 @@ void linenoiseEditMoveWordLeft(struct linenoiseState* l) {
return;
}
do {
l->pos = prevChar(l);
} while (l->pos > 0 && !isWordSeparator(l->buf[l->pos]));
l->pos--;
} while (l->pos > 0 && !isWordSeparator(l->buf[getBufCharPos(l)]));
refreshLine(l);
}

/* Move cursor one word to the right */
void linenoiseEditMoveWordRight(struct linenoiseState* l) {
if (l->pos == l->len) {
if (l->pos == l->chars) {
return;
}
do {
l->pos = nextChar(l);
} while (l->pos != l->len && !isWordSeparator(l->buf[l->pos]));
l->pos++;
} while (l->pos != l->chars && !isWordSeparator(l->buf[getBufCharPos(l)]));
refreshLine(l);
}

Expand All @@ -855,8 +909,8 @@ void linenoiseEditMoveHome(struct linenoiseState* l) {

/* Move cursor to the end of the line. */
void linenoiseEditMoveEnd(struct linenoiseState* l) {
if (l->pos != l->len) {
l->pos = l->len;
if (l->pos != l->chars) {
l->pos = l->chars;
refreshLine(l);
}
}
Expand Down Expand Up @@ -890,11 +944,13 @@ void linenoiseEditHistoryNext(struct linenoiseState* l, int dir) {
/* Delete the character at the right of the cursor without altering the cursor
* position. Basically this is what happens with the "Delete" keyboard key. */
void linenoiseEditDelete(struct linenoiseState* l) {
if (l->len > 0 && l->pos < l->len) {
if (l->len > 0 && l->pos < l->chars) {
uint32_t newPos = nextChar(l);
uint32_t charSize = newPos - l->pos;
memmove(l->buf + l->pos, l->buf + newPos, l->len - newPos);
uint32_t oldPos = getBufCharPos(l);
uint32_t charSize = newPos - oldPos;
memmove(l->buf + oldPos, l->buf + newPos, l->len - oldPos);
l->len -= charSize;
l->chars--;
l->buf[l->len] = '\0';
refreshLine(l);
}
Expand All @@ -904,10 +960,12 @@ void linenoiseEditDelete(struct linenoiseState* l) {
void linenoiseEditBackspace(struct linenoiseState* l) {
if (l->pos > 0 && l->len > 0) {
uint32_t newPos = prevChar(l);
uint32_t charSize = l->pos - newPos;
memmove(l->buf + newPos, l->buf + l->pos, l->len - l->pos);
uint32_t oldPos = getBufCharPos(l);
uint32_t charSize = oldPos - newPos;
memmove(l->buf + newPos, l->buf + oldPos, l->len - newPos);
l->len -= charSize;
l->pos = newPos;
l->pos--;
l->chars--;
l->buf[l->len] = '\0';
refreshLine(l);
}
Expand Down Expand Up @@ -951,6 +1009,7 @@ static int linenoiseEdit(
l.plen = strlen(prompt);
l.oldpos = l.pos = 0;
l.len = 0;
l.chars = 0;
l.cols = getColumns(stdin_fd, stdout_fd);
l.maxrows = 0;
l.history_index = 0;
Expand All @@ -968,7 +1027,7 @@ static int linenoiseEdit(
while (1) {
char c;
int nread;
char seq[3];
char seq[5];

if (inputLeft) {
c = oldInput[0];
Expand Down