带有文字选择的pdf.js

时间:2015-10-11 08:58:26

标签: javascript jquery pdf.js

如何使PDF中的文字可选?

在这里尝试过。 PDF写得很好,但没有文字选择

https://github.com/mozilla/pdf.js

https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js

'use strict';

PDFJS.getDocument('file.pdf').then(function(pdf){
    var page_num = 1;
    pdf.getPage(page_num).then(function(page){
        var scale = 1.5;
        var viewport = page.getViewport(scale);
        var canvas = document.getElementById('the-canvas');
        var context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;

        var canvasOffset = $(canvas).offset();
        var $textLayerDiv = $('#text-layer').css({
            height : viewport.height+'px',
            width : viewport.width+'px',
            top : canvasOffset.top,
            left : canvasOffset.left
        });

        page.render({
            canvasContext : context,
            viewport : viewport
        });

        page.getTextContent().then(function(textContent){
            var textLayer = new TextLayerBuilder({
                textLayerDiv : $textLayerDiv.get(0),
                pageIndex : page_num - 1,
                viewport : viewport
            });

            textLayer.setTextContent(textContent);
            textLayer.render();
        });
    });
});

<body>
  <div>
    <canvas id="the-canvas" style="border:1px solid black;"></canvas>
    <div id="text-layer" class="textLayer"></div>
  </div>
</body>

4 个答案:

答案 0 :(得分:13)

您的JavaScript代码非常完美。 您只需要包含Text Layer Builder所依赖的UI实用程序:

https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js

或在HTML中:

<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>

如果您运行代码(没有ui_utils)并检查debug console, 你会看到ReferenceError: CustomStyle is not defined。 PDFjs quick search中的repo将显示它在ui_utils.js中定义。

这是我的最小但完整的代码供您参考。 我正在使用PDFjs的演示pdf here。 请注意,在生产中,您不应链接到raw.github。

<!DOCTYPE html><meta charset="utf-8">
<link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<body>
  <div>
    <canvas id="the-canvas" style="border:1px solid black;"></canvas>
    <div id="text-layer" class="textLayer"></div>
  </div>
<script>
'use strict';

PDFJS.getDocument('file.pdf').then(function(pdf){
    var page_num = 1;
    pdf.getPage(page_num).then(function(page){
        var scale = 1.5;
        var viewport = page.getViewport(scale);
        var canvas = $('#the-canvas')[0];
        var context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;

        var canvasOffset = $(canvas).offset();
        var $textLayerDiv = $('#text-layer').css({
            height : viewport.height+'px',
            width : viewport.width+'px',
            top : canvasOffset.top,
            left : canvasOffset.left
        });

        page.render({
            canvasContext : context,
            viewport : viewport
        });

        page.getTextContent().then(function(textContent){
           console.log( textContent );
            var textLayer = new TextLayerBuilder({
                textLayerDiv : $textLayerDiv.get(0),
                pageIndex : page_num - 1,
                viewport : viewport
            });

            textLayer.setTextContent(textContent);
            textLayer.render();
        });
    });
});
</script>

答案 1 :(得分:6)

在 pdf.js 版本 2.8.61 上,检查的答案不再起作用,因为 renderTextLayer() 已集成到 pdf.js,不再需要外部源,也不需要 jQuery。

以下代码将使 PDF 文本可选。它加载以下PDF文档为例,请替换为您自己的:

https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf

主要使用两个html元素:

<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>

canvas 用于显示不可选择的文档,.textLayer div 用于可选文本。 .textLayer div 上的文字都是透明的,所以不可见,只提供选择效果。


<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=no">
<script src="//mozilla.github.io/pdf.js/build/pdf.js" crossorigin="anonymous"></script>
<link href="//mozilla.github.io/pdf.js/web/viewer.css" rel="stylesheet" type="text/css" />
<style type="text/css">

#the-canvas {
  border: 1px solid black;
  direction: ltr;
}

</style>
</head>

<body>

<h1>PDF.js Previous/Next example</h1>

<div>
  <button id="prev">Previous</button>
  <button id="next">Next</button>
  &nbsp; &nbsp;
  <span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>

<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>

<script>
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';

// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];

// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';

var pdfDoc = null,
    pageNum = 1,
    pageRendering = false,
    pageNumPending = null,
    //scale = 0.8,
    scale = 1,
    canvas = document.getElementById('the-canvas'),
    ctx = canvas.getContext('2d');

/**
 * Get page info from document, resize canvas accordingly, and render page.
 * @param num Page number.
 */
function renderPage(num) {
  pageRendering = true;
  // Using promise to fetch the page
  pdfDoc.getPage(num).then(function(page) {
    var viewport = page.getViewport({scale: scale});
    canvas.height = viewport.height;
    canvas.width = viewport.width;

    // Render PDF page into canvas context
    var renderContext = {
      canvasContext: ctx,
      viewport: viewport
    };
    var renderTask = page.render(renderContext);

    // Wait for rendering to finish
    renderTask.promise.then(function() {
      pageRendering = false;
      if (pageNumPending !== null) {
        // New page rendering is pending
        renderPage(pageNumPending);
        pageNumPending = null;
      }
    }).then(function() {
      // Returns a promise, on resolving it will return text contents of the page
      return page.getTextContent();
    }).then(function(textContent) {

      // Assign CSS to the textLayer element
      var textLayer = document.querySelector(".textLayer");

      textLayer.style.left = canvas.offsetLeft + 'px';
      textLayer.style.top = canvas.offsetTop + 'px';
      textLayer.style.height = canvas.offsetHeight + 'px';
      textLayer.style.width = canvas.offsetWidth + 'px';

      // Pass the data to the method for rendering of text over the pdf canvas.
      pdfjsLib.renderTextLayer({
        textContent: textContent,
        container: textLayer,
        viewport: viewport,
        textDivs: []
      });
    });
  });

  // Update page counters
  document.getElementById('page_num').textContent = num;
}

/**
 * If another page rendering in progress, waits until the rendering is
 * finised. Otherwise, executes rendering immediately.
 */
function queueRenderPage(num) {
  if (pageRendering) {
    pageNumPending = num;
  } else {
    renderPage(num);
  }
}

/**
 * Displays previous page.
 */
function onPrevPage() {
  if (pageNum <= 1) {
    return;
  }
  pageNum--;
  queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);

/**
 * Displays next page.
 */
function onNextPage() {
  if (pageNum >= pdfDoc.numPages) {
    return;
  }
  pageNum++;
  queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);

/**
 * Asynchronously downloads PDF.
 */
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
  pdfDoc = pdfDoc_;
  document.getElementById('page_count').textContent = pdfDoc.numPages;

  // Initial/first page rendering
  renderPage(pageNum);
});

</script>

</body>
</html>

答案 2 :(得分:3)

经过几个小时的努力,我发现这篇文章对于选择文本和使用没有节点的pdf.js非常有帮助。 Custom PDF Rendering in JavaScript with Mozilla’s PDF.Js

答案 3 :(得分:-1)

您好,您已在HTML内容中创建了画布。

Canvas不支持文本选择,因此您需要将画布更改为其他方式。