1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Merge pull request #16286 from calixteman/copy_all

Add the possibility to copy all the pdf text whatever the rendered pages are (bug 1788035)
This commit is contained in:
calixteman 2023-04-15 19:43:13 +02:00 committed by GitHub
commit 92baf14531
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 253 additions and 3 deletions

View file

@ -504,6 +504,7 @@ const PDFViewerApplication = {
this.pdfViewer = new PDFViewer({
container,
viewer,
hiddenCopyElement: appConfig.hiddenCopyElement,
eventBus,
renderingQueue: pdfRenderingQueue,
linkService: pdfLinkService,

View file

@ -45,6 +45,15 @@
transform: rotate(270deg) translateX(-100%);
}
#hiddenCopyElement {
position: absolute;
top: 0;
left: 0;
width: 0;
height: 0;
display: none;
}
.pdfViewer {
/* Define this variable here and not in :root to avoid to reflow all the UI
when scaling (see #15929). */

View file

@ -82,6 +82,8 @@ function isValidAnnotationEditorMode(mode) {
* @typedef {Object} PDFViewerOptions
* @property {HTMLDivElement} container - The container for the viewer element.
* @property {HTMLDivElement} [viewer] - The viewer element.
* @property {HTMLDivElement} [hiddenCopyElement] - The hidden element used to
* check if all is selected.
* @property {EventBus} eventBus - The application event bus.
* @property {IPDFLinkService} linkService - The navigation/linking service.
* @property {IDownloadManager} [downloadManager] - The download manager
@ -205,8 +207,16 @@ class PDFViewer {
#containerTopLeft = null;
#copyCallbackBound = this.#copyCallback.bind(this);
#enablePermissions = false;
#getAllTextInProgress = false;
#hiddenCopyElement = null;
#interruptCopyCondition = false;
#previousContainerHeight = 0;
#resizeObserver = new ResizeObserver(this.#resizeObserverCallback.bind(this));
@ -230,6 +240,7 @@ class PDFViewer {
}
this.container = options.container;
this.viewer = options.viewer || options.container.firstElementChild;
this.#hiddenCopyElement = options.hiddenCopyElement;
if (
typeof PDFJSDev === "undefined" ||
@ -638,6 +649,89 @@ class PDFViewer {
]);
}
async getAllText() {
const texts = [];
const buffer = [];
for (
let pageNum = 1, pagesCount = this.pdfDocument.numPages;
pageNum <= pagesCount;
++pageNum
) {
if (this.#interruptCopyCondition) {
return null;
}
buffer.length = 0;
const page = await this.pdfDocument.getPage(pageNum);
const { items } = await page.getTextContent();
for (const item of items) {
if (item.str) {
buffer.push(item.str);
}
if (item.hasEOL) {
buffer.push("\n");
}
}
texts.push(buffer.join(""));
}
return texts.join("\n");
}
#copyCallback(event) {
const selection = document.getSelection();
const { focusNode, anchorNode } = selection;
if (
anchorNode &&
focusNode &&
selection.containsNode(this.#hiddenCopyElement)
) {
// About the condition above:
// - having non-null anchorNode and focusNode are here to guaranty that
// we have at least a kind of selection.
// - this.#hiddenCopyElement is an invisible element which is impossible
// to select manually (its display is none) but ctrl+A will select all
// including this element so having it in the selection means that all
// has been selected.
// TODO: if all the pages are rendered we don't need to wait for
// getAllText and we could just get text from the Selection object.
if (this.#getAllTextInProgress) {
return;
}
this.#getAllTextInProgress = true;
// Select all the document.
const savedCursor = this.container.style.cursor;
this.container.style.cursor = "wait";
const interruptCopy = ev =>
(this.#interruptCopyCondition = ev.key === "Escape");
window.addEventListener("keydown", interruptCopy);
this.getAllText()
.then(async text => {
if (text !== null) {
await navigator.clipboard.writeText(text);
}
})
.catch(reason => {
console.warn(
`Something goes wrong when extracting the text: ${reason.message}`
);
})
.finally(() => {
this.#getAllTextInProgress = false;
this.#interruptCopyCondition = false;
window.removeEventListener("keydown", interruptCopy);
this.container.style.cursor = savedCursor;
});
event.preventDefault();
event.stopPropagation();
}
}
/**
* @param {PDFDocumentProxy} pdfDocument
*/
@ -805,6 +899,10 @@ class PDFViewer {
this.findController?.setDocument(pdfDocument); // Enable searching.
this._scriptingManager?.setDocument(pdfDocument); // Enable scripting.
if (this.#hiddenCopyElement) {
document.addEventListener("copy", this.#copyCallbackBound);
}
if (this.#annotationEditorUIManager) {
// Ensure that the Editor buttons, in the toolbar, are updated.
this.eventBus.dispatch("annotationeditormodechanged", {
@ -949,6 +1047,8 @@ class PDFViewer {
this.viewer.removeAttribute("lang");
// Reset all PDF document permissions.
this.viewer.classList.remove(ENABLE_PERMISSIONS_CLASS);
document.removeEventListener("copy", this.#copyCallbackBound);
}
#ensurePageViewVisible() {

View file

@ -82,6 +82,7 @@ See https://github.com/adobe-type-tools/cmap-resources
<div id="mainContainer">
<div id="viewerContainer" tabindex="0">
<div id="hiddenCopyElement"></div>
<div id="viewer" class="pdfViewer"></div>
</div>
</div> <!-- mainContainer -->

View file

@ -41,6 +41,7 @@ function getViewerConfiguration() {
appContainer: document.body,
mainContainer,
viewerContainer: document.getElementById("viewer"),
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
toolbar: {
mainContainer,
container: document.getElementById("floatingToolbar"),

View file

@ -385,6 +385,7 @@ See https://github.com/adobe-type-tools/cmap-resources
</div>
<div id="viewerContainer" tabindex="0">
<div id="hiddenCopyElement"></div>
<div id="viewer" class="pdfViewer"></div>
</div>
</div> <!-- mainContainer -->

View file

@ -41,6 +41,7 @@ function getViewerConfiguration() {
appContainer: document.body,
mainContainer: document.getElementById("viewerContainer"),
viewerContainer: document.getElementById("viewer"),
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
toolbar: {
container: document.getElementById("toolbarViewer"),
numPages: document.getElementById("numPages"),