Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolo-ribaudo committed Jul 24, 2024
1 parent 98e7727 commit a8f7fba
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 63 deletions.
80 changes: 79 additions & 1 deletion test/unit/pdf_find_controller_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ class MockLinkService extends SimpleLinkService {

async function initPdfFindController(
filename,
updateMatchesCountOnProgress = true
updateMatchesCountOnProgress = true,
matcher = undefined
) {
const loadingTask = getDocument(
buildGetDocumentParams(filename || tracemonkeyFileName, {
Expand All @@ -69,6 +70,7 @@ async function initPdfFindController(
linkService,
eventBus,
updateMatchesCountOnProgress,
matcher,
});
pdfFindController.setDocument(pdfDocument); // Enable searching.

Expand Down Expand Up @@ -1054,4 +1056,80 @@ describe("pdf_find_controller", function () {
const { eventBus } = await initPdfFindController();
await testOnFind({ eventBus });
});

describe("custom matcher", () => {
it("calls to the matcher with the right arguments", async () => {
const QUERY = "Foo bar";

const spy = jasmine
.createSpy("custom find matcher")
.and.callFake(() => [{ index: 0, length: 1 }]);

const { eventBus, pdfFindController } = await initPdfFindController(
null,
false,
spy
);

const PAGES_COUNT = 14;

await testSearch({
eventBus,
pdfFindController,
state: { query: QUERY },
selectedMatch: { pageIndex: 0, matchIndex: 0 },
matchesPerPage: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
});

expect(spy).toHaveBeenCalledTimes(PAGES_COUNT);

for (let i = 0; i < PAGES_COUNT; i++) {
const args = spy.calls.argsFor(i);
expect(args[0]).withContext(`page ${i}`).toBe(QUERY);
expect(args[2]).withContext(`page ${i}`).toBe(i);
}

expect(spy.calls.argsFor(0)[1]).toMatch(/^Trace-based /);
expect(spy.calls.argsFor(1)[1]).toMatch(/^Hence, recording and /);
expect(spy.calls.argsFor(12)[1]).toMatch(/Figure 12. Fraction of time /);
expect(spy.calls.argsFor(13)[1]).toMatch(/^not be interpreted as /);
});

it("uses the results returned by the custom matcher", async () => {
const QUERY = "Foo bar";

// prettier-ignore
const spy = jasmine.createSpy("custom find matcher")
.and.returnValue(undefined)
.withArgs(QUERY, jasmine.anything(), 0)
.and.returnValue([
{ index: 20, length: 3 },
{ index: 50, length: 8 },
])
.withArgs(QUERY, jasmine.anything(), 2)
.and.returnValue([
{ index: 7, length: 19 }
])
.withArgs(QUERY, jasmine.anything(), 13)
.and.returnValue([
{ index: 50, length: 2 },
{ index: 54, length: 9 },
{ index: 80, length: 4 },
]);

const { eventBus, pdfFindController } = await initPdfFindController(
null,
false,
spy
);

await testSearch({
eventBus,
pdfFindController,
state: { query: QUERY },
selectedMatch: { pageIndex: 0, matchIndex: 0 },
matchesPerPage: [2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
});
});
});
});
154 changes: 92 additions & 62 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,32 @@ function getOriginalIndex(diffs, pos, len) {
return [oldStart, oldLen];
}

/**
* @callback PDFFindMatcher
* @this {PDFFindController}
* @param {string | string[]} query - The search query.
* @param {string} pageContent - The text content of the page to search in.
* @param {number} pageIndex - The index of the page that is being processed.
* @returns {Promise<SingleFindMatch[]> | SingleFindMatch[] | undefined} An
* array of matches in the provided page.
*/

/**
* @typedef {Object} SingleFindMatch
* @property {number} index - The start of the matched text in the page's string
* contents.
* @property {number} length - The length of the matched text.
*/

/**
* @typedef {Object} PDFFindControllerOptions
* @property {IPDFLinkService} linkService - The navigation/linking service.
* @property {EventBus} eventBus - The application event bus.
* @property {boolean} [updateMatchesCountOnProgress] - True if the matches
* count must be updated on progress or only when the last page is reached.
* The default value is `true`.
* @property {PDFFindMatcher} [matcher] - The function that will be used to
* run the search queries.
*/

/**
Expand All @@ -387,13 +406,22 @@ class PDFFindController {

#visitedPagesCount = 0;

/** @type {PDFFindMatcher} */
#matcher = null;

/**
* @param {PDFFindControllerOptions} options
*/
constructor({ linkService, eventBus, updateMatchesCountOnProgress = true }) {
constructor({
linkService,
eventBus,
updateMatchesCountOnProgress = true,
matcher = this.#defaultFindMatcher,
}) {
this._linkService = linkService;
this._eventBus = eventBus;
this.#updateMatchesCountOnProgress = updateMatchesCountOnProgress;
this.#matcher = matcher;

/**
* Callback used to check if a `pageNumber` is currently visible.
Expand Down Expand Up @@ -670,37 +698,6 @@ class PDFFindController {
return true;
}

#calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
const matches = (this._pageMatches[pageIndex] = []);
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
if (!query) {
// The query can be empty because some chars like diacritics could have
// been stripped out.
return;
}
const diffs = this._pageDiffs[pageIndex];
let match;
while ((match = query.exec(pageContent)) !== null) {
if (
entireWord &&
!this.#isEntireWord(pageContent, match.index, match[0].length)
) {
continue;
}

const [matchPos, matchLen] = getOriginalIndex(
diffs,
match.index,
match[0].length
);

if (matchLen) {
matches.push(matchPos);
matchesLength.push(matchLen);
}
}
}

#convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this.#state;
let isUnicode = false;
Expand Down Expand Up @@ -771,13 +768,56 @@ class PDFFindController {
return [isUnicode, query];
}

#calculateMatch(pageIndex) {
let query = this.#query;
async #calculateMatch(pageIndex) {
const query = this.#query;
if (query.length === 0) {
return; // Do nothing: the matches should be wiped out already.
}
const { caseSensitive, entireWord } = this.#state;
const pageContent = this._pageContents[pageIndex];

const matcherResult = await this.#matcher(
query,
this._pageContents[pageIndex],
pageIndex
);

const matches = (this._pageMatches[pageIndex] = []);
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
const diffs = this._pageDiffs[pageIndex];

matcherResult?.forEach(({ index, length }) => {
const [matchPos, matchLen] = getOriginalIndex(diffs, index, length);

if (matchLen) {
matches.push(matchPos);
matchesLength.push(matchLen);
}
});

// When `highlightAll` is set, ensure that the matches on previously
// rendered (and still active) pages are correctly highlighted.
if (this.#state.highlightAll) {
this.#updatePage(pageIndex);
}
if (this._resumePageIdx === pageIndex) {
this._resumePageIdx = null;
this.#nextPageMatch();
}

// Update the match count.
const pageMatchesCount = this._pageMatches[pageIndex].length;
this._matchesCountTotal += pageMatchesCount;
if (this.#updateMatchesCountOnProgress) {
if (pageMatchesCount > 0) {
this.#updateUIResultsCount();
}
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
// For example, in GeckoView we want to have only the final update because
// the Java side provides only one object to update the counts.
this.#updateUIResultsCount();
}
}

#defaultFindMatcher(query, pageContent, pageIndex) {
const hasDiacritics = this._hasDiacritics[pageIndex];

let isUnicode = false;
Expand All @@ -799,34 +839,25 @@ class PDFFindController {
})
.join("|");
}
if (!query) {
return;
}

const { caseSensitive, entireWord } = this.#state;
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
query = query ? new RegExp(query, flags) : null;
query = new RegExp(query, flags);

this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
const matches = [];
for (const { index, 0: text } of pageContent.matchAll(query)) {
const { length } = text;
if (entireWord && !this.#isEntireWord(pageContent, index, length)) {
continue;
}

// When `highlightAll` is set, ensure that the matches on previously
// rendered (and still active) pages are correctly highlighted.
if (this.#state.highlightAll) {
this.#updatePage(pageIndex);
}
if (this._resumePageIdx === pageIndex) {
this._resumePageIdx = null;
this.#nextPageMatch();
matches.push({ index, length });
}

// Update the match count.
const pageMatchesCount = this._pageMatches[pageIndex].length;
this._matchesCountTotal += pageMatchesCount;
if (this.#updateMatchesCountOnProgress) {
if (pageMatchesCount > 0) {
this.#updateUIResultsCount();
}
} else if (++this.#visitedPagesCount === this._linkService.pagesCount) {
// For example, in GeckoView we want to have only the final update because
// the Java side provides only one object to update the counts.
this.#updateUIResultsCount();
}
return matches;

Check failure on line 860 in web/pdf_find_controller.js

View workflow job for this annotation

GitHub Actions / Lint (lts/*)

Private method #defaultFindMatcher expected no return value

Check failure on line 860 in web/pdf_find_controller.js

View workflow job for this annotation

GitHub Actions / Lint (lts/*)

Private method #defaultFindMatcher expected no return value
}

#extractText() {
Expand Down Expand Up @@ -930,10 +961,9 @@ class PDFFindController {
continue;
}
this._pendingFindMatches.add(i);
this._extractTextPromises[i].then(() => {
this._pendingFindMatches.delete(i);
this.#calculateMatch(i);
});
this._extractTextPromises[i]
.then(() => this.#calculateMatch(i))
.finally(() => this._pendingFindMatches.delete(i));
}
}

Expand Down

0 comments on commit a8f7fba

Please sign in to comment.