hace 4 meses · 099b787fcb
--- a/app.py
+++ b/app.py
@@ -9,7 +9,7 @@ from flask import Flask, request, jsonify, send_file
 
				 from add_signature import find_signature_positions, add_signature_to_pdf
			
 
				 from add_watermark import add_watermark_to_pdf
			
 
				 from extract_table import extract_temp_time, extract_pdf_table_to_excel, extract_temp_by_datetime_pattern, allowed_file, \
			
 
				-    safe_filename
			
 
				+    safe_filename, extract_temperature_data_from_pdf
			
 
				 from lib import Qiniu
			
 
				 from werkzeug.utils import secure_filename
			
 
				 from flask_cors import CORS
			
@@ -130,6 +130,8 @@ def extract_table():
 
				                     df = extract_temp_time(filepath)
			
 
				                 if "设备汇总报告" in text:
			
 
				                     df = extract_temp_by_datetime_pattern(filepath)
			
 
				+                if "详细数据" in text:
			
 
				+                    df = extract_temperature_data_from_pdf(filepath)
			
 
				 
			
 
				         if df is None:
			
 
				             os.remove(filepath)
			
--- a/extract_table.py
+++ b/extract_table.py
@@ -131,3 +131,30 @@ def extract_temp_by_datetime_pattern(pdf_path):
 
				     df = pd.DataFrame(all_data, columns=['时间', '温度'])
			
 
				     df = df.sort_values('时间').reset_index(drop=True)
			
 
				     return df
			
 
				+
			
 
				+def extract_temperature_data_from_pdf(pdf_path):
			
 
				+    """
			
 
				+    从PDF文件中提取时间和温度数据
			
 
				+    """
			
 
				+    all_data = []
			
 
				+
			
 
				+    with pdfplumber.open(pdf_path) as pdf:
			
 
				+        for page in pdf.pages:
			
 
				+            text = page.extract_text()
			
 
				+
			
 
				+            # 使用正则表达式匹配数据行
			
 
				+            # 匹配模式: 序号 | 日期时间 | 温度 | 状态
			
 
				+            for value in text.split("\n"):
			
 
				+                pattern = r'(\d+)\s+(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(-?\d+\.\d+|-?\d+)\s+([^\s]+)'
			
 
				+                matches = re.findall(pattern, value)
			
 
				+                for match in matches:
			
 
				+                    index, datetime_str, temperature, status = match
			
 
				+                    all_data.append({
			
 
				+                        '时间': datetime_str,
			
 
				+                        '温度': temperature,
			
 
				+                    })
			
 
				+    # 转换为DataFrame
			
 
				+    df = pd.DataFrame(all_data, columns=['时间', '温度'])
			
 
				+    # 按时间排序
			
 
				+    df = df.sort_values('时间').reset_index(drop=True)
			
 
				+    return df