From 2166c1a14f9629aa49a5f3bb849ce878df4c4892 Mon Sep 17 00:00:00 2001
From: yangbin <yangbin@qq.com>
Date: 星期三, 28 八月 2024 17:23:25 +0800
Subject: [PATCH] 2

---
 lxzn-module-ai/src/main/java/org/jeecg/modules/utils/PdfUtils.java |  263 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 263 insertions(+), 0 deletions(-)

diff --git a/lxzn-module-ai/src/main/java/org/jeecg/modules/utils/PdfUtils.java b/lxzn-module-ai/src/main/java/org/jeecg/modules/utils/PdfUtils.java
new file mode 100644
index 0000000..c0218e9
--- /dev/null
+++ b/lxzn-module-ai/src/main/java/org/jeecg/modules/utils/PdfUtils.java
@@ -0,0 +1,263 @@
+package org.jeecg.modules.utils;
+
+import com.itextpdf.text.BaseColor;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.pdf.PdfContentByte;
+import com.itextpdf.text.pdf.PdfReader;
+import com.itextpdf.text.pdf.PdfStamper;
+import com.itextpdf.text.pdf.parser.PdfTextExtractor;
+import org.jeecg.modules.ai.vo.ItemPosition;
+import org.jeecg.modules.ai.vo.KeyWordPosition;
+import org.jeecg.modules.ai.vo.KeyWordPositionVo;
+import org.jeecg.modules.ai.vo.MyTextExtractionStrategy;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author clown
+ * * @date 2024/7/23
+ */
+public class PdfUtils {
+    /**
+     * 鐢ㄤ簬渚涘閮ㄧ被璋冪敤鑾峰彇鍏抽敭瀛楁墍鍦≒DF鏂囦欢鍧愭爣
+     * @param filepath
+     * @param keyWords
+     * @return
+     */
+    public static List<KeyWordPosition> getKeyWordsByPath(String filepath, String keyWords) {
+        List<KeyWordPosition> matchItems = null;
+        try{
+            PdfReader pdfReader = new PdfReader(filepath);
+            matchItems = getKeyWords(pdfReader, keyWords);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        return matchItems;
+    }
+
+    /**
+     * 鑾峰彇鍏抽敭瀛楁墍鍦≒DF鍧愭爣
+     * @param pdfReader
+     * @param keyWords
+     * @return
+     */
+    private static List<KeyWordPosition> getKeyWords(PdfReader pdfReader, String keyWords) {
+        int page = 0;
+
+        List<KeyWordPosition> matchItems = new ArrayList<>();
+        try{
+            int pageNum = pdfReader.getNumberOfPages();
+            StringBuilder allText = null;
+
+            //閬嶅巻椤�
+            for (page = 1; page <= pageNum; page++) {
+                //鍙褰曞綋椤电殑鎵�鏈夊唴瀹癸紝闇�瑕佽褰曞叏閮ㄩ〉鏀惧湪寰幆澶栭潰
+                List<ItemPosition> allItems = new ArrayList<>();
+                //鎵弿鍐呭
+                MyTextExtractionStrategy myTextExtractionStrategy = new MyTextExtractionStrategy(allItems, page);
+                PdfTextExtractor.getTextFromPage(pdfReader, page, myTextExtractionStrategy);
+                //褰撻〉鐨勬枃瀛楀唴瀹癸紝鐢ㄤ簬鍏抽敭璇嶅尮閰�
+                allText = new StringBuilder();
+                //涓�涓瓧涓�涓瓧鐨勯亶鍘�
+                for (int i=0; i<allItems.size(); i++) {
+                    ItemPosition item = allItems.get(i);
+                    allText.append(item.getText());
+                    //鍏抽敭瀛楀瓨鍦ㄨ繛缁涓潡涓�
+                    if(allText.indexOf(keyWords) != -1) {
+                        KeyWordPosition keyWordPosition = new KeyWordPosition();
+                        //璁板綍鍏抽敭璇嶆瘡涓瓧鐨勪綅缃紝鍙褰曞紑濮嬬粨鏉熸爣璁版椂浼氭湁闂
+                        List<ItemPosition> listItem = new ArrayList<>();
+                        for(int j=i-keyWords.length()+1; j<=i; j++) {
+                            listItem.add(allItems.get(j));
+                        }
+                        keyWordPosition.setListItem(listItem);
+                        keyWordPosition.setText(keyWords);
+                        matchItems.add(keyWordPosition);
+                        allText.setLength(0);
+                    }
+                }
+
+
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return matchItems;
+    }
+
+
+
+
+    public static void main(String[] args) throws Exception {
+        String keyword = "涓�鏃﹀彂鐢熸満搴婂畨鍏ㄤ簨鏁�";
+        String sourcePdf = "F:\\123.pdf";
+        String watermarkPdf = "F:\\12_bak.pdf";
+        Long start = System.currentTimeMillis();
+        System.out.println("寮�濮嬫壂鎻�....");
+        List<KeyWordPosition> matchItems = getKeyWordsByPath(sourcePdf, keyword);
+        System.out.println(matchItems);
+        System.out.println("鎵弿缁撴潫["+(System.currentTimeMillis()-start)+"ms]锛屽叡鎵惧埌鍏抽敭瀛梉"+keyword+"]鍑虹幇["+matchItems.size()+"]娆�");
+        start = System.currentTimeMillis();
+        System.out.println("寮�濮嬫坊鍔犳爣璁�....");
+        andRectangleMark(sourcePdf
+                , watermarkPdf
+                , matchItems
+                , BaseColor.RED
+                , 2
+                , 2);
+        //鏂囦欢鏁寸悊
+        System.out.println("鏍囪娣诲姞瀹屾垚["+(System.currentTimeMillis()-start)+"ms]");
+
+    }
+
+
+    public static List<KeyWordPositionVo> findListWord(String sourcePath, String keyWord){
+        System.out.println("寮�濮嬫壂鎻�....");
+        try {
+            List<KeyWordPositionVo> matchItems = getKeyWordsByPathOne(sourcePath, keyWord);
+            return matchItems;
+        }catch (Exception e) {
+            return null;
+        }
+    }
+
+    /**
+     * 鐢ㄤ簬渚涘閮ㄧ被璋冪敤鑾峰彇鍏抽敭瀛楁墍鍦≒DF鏂囦欢鍧愭爣
+     * @param filepath
+     * @param keyWords
+     * @return
+     */
+    public static List<KeyWordPositionVo> getKeyWordsByPathOne(String filepath, String keyWords) {
+        List<KeyWordPositionVo> matchItems = null;
+        try{
+            PdfReader pdfReader = new PdfReader(filepath);
+            matchItems = getKeyWordPage(pdfReader, keyWords);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+        return matchItems;
+    }
+
+    /**
+     * 鑾峰彇鍏抽敭瀛楁墍鍦≒DF鍧愭爣 涓�椤靛彧鍙栫涓�鏉�
+     * @param pdfReader
+     * @param keyWords
+     * @return
+     */
+    private static List<KeyWordPositionVo> getKeyWordPage(PdfReader pdfReader, String keyWords) {
+        int page = 0;
+        List<KeyWordPositionVo> matchItems = new ArrayList<>();
+        try{
+            int pageNum = pdfReader.getNumberOfPages();
+            StringBuilder allText = null;
+            //閬嶅巻椤�
+            for (page = 1; page <= pageNum; page++) {
+                //鍙褰曞綋椤电殑鎵�鏈夊唴瀹癸紝闇�瑕佽褰曞叏閮ㄩ〉鏀惧湪寰幆澶栭潰
+                List<ItemPosition> allItems = new ArrayList<>();
+                //鎵弿鍐呭
+                MyTextExtractionStrategy myTextExtractionStrategy = new MyTextExtractionStrategy(allItems, page);
+                PdfTextExtractor.getTextFromPage(pdfReader, page, myTextExtractionStrategy);
+                //褰撻〉鐨勬枃瀛楀唴瀹癸紝鐢ㄤ簬鍏抽敭璇嶅尮閰�
+                allText = new StringBuilder();
+                //涓�涓瓧涓�涓瓧鐨勯亶鍘�
+                for (int i=0; i<allItems.size(); i++) {
+                    ItemPosition item = allItems.get(i);
+                    allText.append(item.getText());
+                    //鍏抽敭瀛楀瓨鍦ㄨ繛缁涓潡涓�
+                    if(allText.indexOf(keyWords) != -1) {
+                        KeyWordPosition keyWordPosition = new KeyWordPosition();
+                        //璁板綍鍏抽敭璇嶆瘡涓瓧鐨勪綅缃紝鍙褰曞紑濮嬬粨鏉熸爣璁版椂浼氭湁闂
+                        KeyWordPositionVo vo = new KeyWordPositionVo();
+                        for(int j=i-keyWords.length()+1; j<=i; j++) {
+                            if (allItems.get(j).getText().indexOf(keyWords)!= -1) {
+                                vo.setPage(allItems.get(j).getPage());
+                                vo.setRectangle(allItems.get(j).getRectangle());
+                                vo.setText(allItems.get(j).getText());
+                                vo.setRectangleString(allItems.get(j).getRectangle().toString());
+                                matchItems.add(vo);
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return matchItems;
+    }
+
+    /**
+     * 娣诲姞鐭╁舰鏍囪
+     * @param oldPath
+     * @param newPath
+     * @param matchItems 鍏抽敭璇�
+     * @param color 鏍囪棰滆壊
+     * @param lineWidth 绾挎潯绮楃粏
+     * @param padding 杈规鍐呰竟璺�
+     * @throws DocumentException
+     * @throws IOException
+     */
+    public static void andRectangleMark(String oldPath, String newPath, List<KeyWordPosition> matchItems, BaseColor color, int lineWidth, int padding) throws DocumentException, IOException{
+        // 寰呭姞姘村嵃鐨勬枃浠�
+        PdfReader reader = new PdfReader(oldPath);
+        // 鍔犲畬姘村嵃鐨勬枃浠�
+        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(newPath));
+
+        PdfContentByte content;
+
+        // 璁剧疆瀛椾綋
+        // 寰幆瀵规瘡椤垫彃鍏ユ按鍗�
+        for (KeyWordPosition keyWordPosition:matchItems)
+        {
+            //涓�涓叧閿瘝鐨勬墍鏈夊瓧鍧愭爣
+            List<ItemPosition> oneKeywordItems = keyWordPosition.getListItem();
+            for(int i=0; i<oneKeywordItems.size(); i++) {
+                ItemPosition item = oneKeywordItems.get(i);
+                ItemPosition preItem = i==0?null:oneKeywordItems.get(i-1);
+                //甯︾‘瀹炴槸鍚︽按鍗�
+
+                // 姘村嵃鐨勮捣濮�
+                content = stamper.getOverContent(item.getPage());
+                // 寮�濮嬪啓鍏ユ按鍗�
+                content.setLineWidth(lineWidth);
+                content.setColorStroke(color);
+                System.out.println(item.toString());
+
+                //搴曠嚎
+                content.moveTo(item.getRectangle().getLeft()-padding, item.getRectangle().getBottom()-padding);
+                content.lineTo(item.getRectangle().getRight()+padding, item.getRectangle().getBottom()-padding);
+                if(i!=0 && preItem!=null && (preItem.getRectangle().getBottom()-padding)==(item.getRectangle().getBottom()-padding) && (preItem.getRectangle().getRight()+padding)!=(item.getRectangle().getLeft()-padding)) {
+                    content.moveTo(preItem.getRectangle().getRight()+padding, preItem.getRectangle().getBottom()-padding);
+                    content.lineTo(item.getRectangle().getLeft()-padding, item.getRectangle().getBottom()-padding);
+                }
+                //涓婄嚎
+                content.moveTo(item.getRectangle().getLeft()-padding, item.getRectangle().getTop()+padding);
+                content.lineTo(item.getRectangle().getRight()+padding, item.getRectangle().getTop()+padding);
+                if(i!=0 && preItem!=null && (preItem.getRectangle().getTop()+padding)==(item.getRectangle().getTop()+padding) && (preItem.getRectangle().getRight()+padding)!=(item.getRectangle().getLeft()-padding)) {
+                    content.moveTo(preItem.getRectangle().getRight()+padding, preItem.getRectangle().getTop()+padding);
+                    content.lineTo(item.getRectangle().getLeft()-padding, item.getRectangle().getTop()+padding);
+                }
+
+                //宸︾嚎
+                if(i==0) {
+                    content.moveTo(item.getRectangle().getLeft()-padding, item.getRectangle().getBottom()-padding);
+                    content.lineTo(item.getRectangle().getLeft()-padding, item.getRectangle().getTop()+padding);
+                }
+                //鍙崇嚎
+                if(i==(oneKeywordItems.size()-1)) {
+                    content.moveTo(item.getRectangle().getRight()+padding, item.getRectangle().getBottom()-padding);
+                    content.lineTo(item.getRectangle().getRight()+padding, item.getRectangle().getTop()+padding);
+                }
+
+                content.stroke();
+            }
+        }
+        stamper.close();
+    }
+}
+

--
Gitblit v1.9.3