Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bump pdfbox to 3.0.1 #79

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions deps.edn
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{:deps {org.clojure/clojure {:mvn/version "1.11.1"},
org.apache.pdfbox/pdfbox {:mvn/version "2.0.29"}
org.apache.pdfbox/pdfbox-tools {:mvn/version "2.0.29"}}
org.apache.pdfbox/pdfbox {:mvn/version "3.0.1"}
org.apache.pdfbox/pdfbox-io {:mvn/version "3.0.1"}
org.apache.pdfbox/pdfbox-tools {:mvn/version "3.0.1"}}
:aliases {:test {:extra-paths ["test"]
:extra-deps {org.clojure/test.check {:mvn/version "RELEASE"}}}
:runner {:extra-deps {com.cognitect/test-runner
Expand Down
5 changes: 3 additions & 2 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
:license {:name "BSD"
:url "https://opensource.org/license/bsd-3-clause/"}
:dependencies [[org.clojure/clojure "1.11.1"]
[org.apache.pdfbox/pdfbox "2.0.29"]
[org.apache.pdfbox/pdfbox-tools "2.0.29"]])
[org.apache.pdfbox/pdfbox "3.0.1"]
[org.apache.pdfbox/pdfbox "3.0.1"]
[org.apache.pdfbox/pdfbox-tools "3.0.1"]])
49 changes: 27 additions & 22 deletions src/pdfboxing/common.clj
Original file line number Diff line number Diff line change
@@ -1,26 +1,35 @@
(ns pdfboxing.common
(:require [clojure.java.io :as io])
(:import (java.io File)
(org.apache.pdfbox.pdmodel PDDocument)
(org.apache.pdfbox.io RandomAccessFile)
(org.apache.pdfbox.pdfparser PDFParser)))
(org.apache.pdfbox Loader)
(org.apache.pdfbox.io RandomAccessReadBufferedFile)))


(defn load-pdf-from-media [pdf-file-or-path]
(try
(-> pdf-file-or-path
^File (io/as-file)
(Loader/loadPDF))
(catch Exception _)))

(defn load-pdf-from-bytes [^bytes pdf-bytes]
(Loader/loadPDF pdf-bytes))

(defn try-get-as-pdf
"Try and get the pdf-file-or-path as a PDF.
Returns nil if pdf-file-or-path could not be loaded as a PDF."
[pdf-file-or-path]
(let [^File pdf-file (io/as-file pdf-file-or-path)
random-access-file (RandomAccessFile. pdf-file "r")
parser (PDFParser. random-access-file)]
(try
(.parse parser)
(.getPDDocument parser)
(catch Exception _))))
(try
(-> pdf-file-or-path
^File (io/as-file)
(RandomAccessReadBufferedFile.)
(Loader/loadPDF))
(catch Exception _)))

(defn is-pdf?
"Confirm that the PDF supplied is really a PDF"
[pdf-file-or-path]
(if-let [pdf (try-get-as-pdf pdf-file-or-path)]
(if-let [pdf (load-pdf-from-media pdf-file-or-path)]
(try
(not (nil? pdf))
(finally
Expand All @@ -29,31 +38,27 @@

(defn load-pdf
"Load a given PDF only after checking if it really is a PDF"
[pdf-file-or-path]
(if-let [pdf (try-get-as-pdf pdf-file-or-path)]
[bytes-pdf-file-or-path]
(if-let [pdf (try-get-as-pdf bytes-pdf-file-or-path)]
pdf
(throw (IllegalArgumentException. (format "%s is not a PDF file" pdf-file-or-path)))))
(throw (IllegalArgumentException. (format "%s is not a PDF file" bytes-pdf-file-or-path)))))

(defprotocol PDFDocument
"return an object from which text can be extracted"
(obtain-document [source]))

(extend-protocol PDFDocument
(Class/forName "[B") ;; byte-array
(Class/forName "[B") ;; byte-array
(obtain-document [source]
(PDDocument/load source))
(load-pdf-from-bytes source))

String
(obtain-document [source]
(load-pdf source))
(load-pdf-from-media source))

File
(obtain-document [source]
(load-pdf source))

PDDocument
(obtain-document [source]
source))
(load-pdf-from-media source)))

(defn get-form
"Obtain AcroForm from a open `doc`, opened with obtain-document"
Expand Down
22 changes: 15 additions & 7 deletions src/pdfboxing/merge.clj
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
(ns pdfboxing.merge
(:require [pdfboxing.common :as common])
(:import (java.io InputStream OutputStream)
(:import (java.io File InputStream OutputStream)
(org.apache.pdfbox.io IOUtils RandomAccessRead RandomAccessReadBuffer)
(org.apache.pdfbox.multipdf PDFMergerUtility)
(org.apache.pdfbox.pdmodel PDDocument
PDPage
PDPageContentStream)
PDPage PDPageContentStream)
(org.apache.pdfbox.pdmodel.common PDRectangle)
(org.apache.pdfbox.pdmodel.graphics.image PDImageXObject)))

(defn throw-exception
[message]
[^String message]
(throw (IllegalArgumentException. message)))

(defn check-if-present
Expand Down Expand Up @@ -43,18 +43,26 @@
{:pre [(arg-check output input)]}
(let [merger (PDFMergerUtility.)]
(doseq [source input]
(.addSource merger source))
(condp instance? source
File (.addSource merger ^File source)
String (.addSource merger ^String source)
InputStream (.addSource merger ^RandomAccessRead
(RandomAccessReadBuffer. ^InputStream source))))
(cond
(instance? OutputStream output)
(.setDestinationStream merger output)

:else
(.setDestinationFileName merger output))
(.mergeDocuments merger)))
(.mergeDocuments merger (IOUtils/createMemoryOnlyStreamCache))
(condp instance? output
File (.close output)
OutputStream (.close output)
nil)))

(defn- add-image-to-page
"Adds image as a page to the document object"
[doc ^org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject image]
[^PDDocument doc ^PDImageXObject image]
(let [page-size PDRectangle/A4
original-width (.getWidth image)
original-height (.getHeight image)
Expand Down
Loading