mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-20 15:18:08 +02:00
Merge pull request #16286 from calixteman/copy_all
Add the possibility to copy all the pdf text whatever the rendered pages are (bug 1788035)
This commit is contained in:
commit
92baf14531
10 changed files with 253 additions and 3 deletions
|
@ -504,6 +504,7 @@ const PDFViewerApplication = {
|
|||
this.pdfViewer = new PDFViewer({
|
||||
container,
|
||||
viewer,
|
||||
hiddenCopyElement: appConfig.hiddenCopyElement,
|
||||
eventBus,
|
||||
renderingQueue: pdfRenderingQueue,
|
||||
linkService: pdfLinkService,
|
||||
|
|
|
@ -45,6 +45,15 @@
|
|||
transform: rotate(270deg) translateX(-100%);
|
||||
}
|
||||
|
||||
#hiddenCopyElement {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 0;
|
||||
height: 0;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.pdfViewer {
|
||||
/* Define this variable here and not in :root to avoid to reflow all the UI
|
||||
when scaling (see #15929). */
|
||||
|
|
|
@ -82,6 +82,8 @@ function isValidAnnotationEditorMode(mode) {
|
|||
* @typedef {Object} PDFViewerOptions
|
||||
* @property {HTMLDivElement} container - The container for the viewer element.
|
||||
* @property {HTMLDivElement} [viewer] - The viewer element.
|
||||
* @property {HTMLDivElement} [hiddenCopyElement] - The hidden element used to
|
||||
* check if all is selected.
|
||||
* @property {EventBus} eventBus - The application event bus.
|
||||
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
||||
* @property {IDownloadManager} [downloadManager] - The download manager
|
||||
|
@ -205,8 +207,16 @@ class PDFViewer {
|
|||
|
||||
#containerTopLeft = null;
|
||||
|
||||
#copyCallbackBound = this.#copyCallback.bind(this);
|
||||
|
||||
#enablePermissions = false;
|
||||
|
||||
#getAllTextInProgress = false;
|
||||
|
||||
#hiddenCopyElement = null;
|
||||
|
||||
#interruptCopyCondition = false;
|
||||
|
||||
#previousContainerHeight = 0;
|
||||
|
||||
#resizeObserver = new ResizeObserver(this.#resizeObserverCallback.bind(this));
|
||||
|
@ -230,6 +240,7 @@ class PDFViewer {
|
|||
}
|
||||
this.container = options.container;
|
||||
this.viewer = options.viewer || options.container.firstElementChild;
|
||||
this.#hiddenCopyElement = options.hiddenCopyElement;
|
||||
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
|
@ -638,6 +649,89 @@ class PDFViewer {
|
|||
]);
|
||||
}
|
||||
|
||||
async getAllText() {
|
||||
const texts = [];
|
||||
const buffer = [];
|
||||
for (
|
||||
let pageNum = 1, pagesCount = this.pdfDocument.numPages;
|
||||
pageNum <= pagesCount;
|
||||
++pageNum
|
||||
) {
|
||||
if (this.#interruptCopyCondition) {
|
||||
return null;
|
||||
}
|
||||
buffer.length = 0;
|
||||
const page = await this.pdfDocument.getPage(pageNum);
|
||||
const { items } = await page.getTextContent();
|
||||
for (const item of items) {
|
||||
if (item.str) {
|
||||
buffer.push(item.str);
|
||||
}
|
||||
if (item.hasEOL) {
|
||||
buffer.push("\n");
|
||||
}
|
||||
}
|
||||
texts.push(buffer.join(""));
|
||||
}
|
||||
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
#copyCallback(event) {
|
||||
const selection = document.getSelection();
|
||||
const { focusNode, anchorNode } = selection;
|
||||
if (
|
||||
anchorNode &&
|
||||
focusNode &&
|
||||
selection.containsNode(this.#hiddenCopyElement)
|
||||
) {
|
||||
// About the condition above:
|
||||
// - having non-null anchorNode and focusNode are here to guaranty that
|
||||
// we have at least a kind of selection.
|
||||
// - this.#hiddenCopyElement is an invisible element which is impossible
|
||||
// to select manually (its display is none) but ctrl+A will select all
|
||||
// including this element so having it in the selection means that all
|
||||
// has been selected.
|
||||
|
||||
// TODO: if all the pages are rendered we don't need to wait for
|
||||
// getAllText and we could just get text from the Selection object.
|
||||
|
||||
if (this.#getAllTextInProgress) {
|
||||
return;
|
||||
}
|
||||
this.#getAllTextInProgress = true;
|
||||
|
||||
// Select all the document.
|
||||
const savedCursor = this.container.style.cursor;
|
||||
this.container.style.cursor = "wait";
|
||||
|
||||
const interruptCopy = ev =>
|
||||
(this.#interruptCopyCondition = ev.key === "Escape");
|
||||
window.addEventListener("keydown", interruptCopy);
|
||||
|
||||
this.getAllText()
|
||||
.then(async text => {
|
||||
if (text !== null) {
|
||||
await navigator.clipboard.writeText(text);
|
||||
}
|
||||
})
|
||||
.catch(reason => {
|
||||
console.warn(
|
||||
`Something goes wrong when extracting the text: ${reason.message}`
|
||||
);
|
||||
})
|
||||
.finally(() => {
|
||||
this.#getAllTextInProgress = false;
|
||||
this.#interruptCopyCondition = false;
|
||||
window.removeEventListener("keydown", interruptCopy);
|
||||
this.container.style.cursor = savedCursor;
|
||||
});
|
||||
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {PDFDocumentProxy} pdfDocument
|
||||
*/
|
||||
|
@ -805,6 +899,10 @@ class PDFViewer {
|
|||
this.findController?.setDocument(pdfDocument); // Enable searching.
|
||||
this._scriptingManager?.setDocument(pdfDocument); // Enable scripting.
|
||||
|
||||
if (this.#hiddenCopyElement) {
|
||||
document.addEventListener("copy", this.#copyCallbackBound);
|
||||
}
|
||||
|
||||
if (this.#annotationEditorUIManager) {
|
||||
// Ensure that the Editor buttons, in the toolbar, are updated.
|
||||
this.eventBus.dispatch("annotationeditormodechanged", {
|
||||
|
@ -949,6 +1047,8 @@ class PDFViewer {
|
|||
this.viewer.removeAttribute("lang");
|
||||
// Reset all PDF document permissions.
|
||||
this.viewer.classList.remove(ENABLE_PERMISSIONS_CLASS);
|
||||
|
||||
document.removeEventListener("copy", this.#copyCallbackBound);
|
||||
}
|
||||
|
||||
#ensurePageViewVisible() {
|
||||
|
|
|
@ -82,6 +82,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
|||
<div id="mainContainer">
|
||||
|
||||
<div id="viewerContainer" tabindex="0">
|
||||
<div id="hiddenCopyElement"></div>
|
||||
<div id="viewer" class="pdfViewer"></div>
|
||||
</div>
|
||||
</div> <!-- mainContainer -->
|
||||
|
|
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
|||
appContainer: document.body,
|
||||
mainContainer,
|
||||
viewerContainer: document.getElementById("viewer"),
|
||||
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||
toolbar: {
|
||||
mainContainer,
|
||||
container: document.getElementById("floatingToolbar"),
|
||||
|
|
|
@ -385,6 +385,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
|||
</div>
|
||||
|
||||
<div id="viewerContainer" tabindex="0">
|
||||
<div id="hiddenCopyElement"></div>
|
||||
<div id="viewer" class="pdfViewer"></div>
|
||||
</div>
|
||||
</div> <!-- mainContainer -->
|
||||
|
|
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
|||
appContainer: document.body,
|
||||
mainContainer: document.getElementById("viewerContainer"),
|
||||
viewerContainer: document.getElementById("viewer"),
|
||||
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||
toolbar: {
|
||||
container: document.getElementById("toolbarViewer"),
|
||||
numPages: document.getElementById("numPages"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue