|
|
@@ -131,3 +131,30 @@ def extract_temp_by_datetime_pattern(pdf_path):
|
|
|
df = pd.DataFrame(all_data, columns=['时间', '温度'])
|
|
|
df = df.sort_values('时间').reset_index(drop=True)
|
|
|
return df
|
|
|
+
|
|
|
+def extract_temperature_data_from_pdf(pdf_path):
|
|
|
+ """
|
|
|
+ 从PDF文件中提取时间和温度数据
|
|
|
+ """
|
|
|
+ all_data = []
|
|
|
+
|
|
|
+ with pdfplumber.open(pdf_path) as pdf:
|
|
|
+ for page in pdf.pages:
|
|
|
+ text = page.extract_text()
|
|
|
+
|
|
|
+ # 使用正则表达式匹配数据行
|
|
|
+ # 匹配模式: 序号 | 日期时间 | 温度 | 状态
|
|
|
+ for value in text.split("\n"):
|
|
|
+ pattern = r'(\d+)\s+(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(-?\d+\.\d+|-?\d+)\s+([^\s]+)'
|
|
|
+ matches = re.findall(pattern, value)
|
|
|
+ for match in matches:
|
|
|
+ index, datetime_str, temperature, status = match
|
|
|
+ all_data.append({
|
|
|
+ '时间': datetime_str,
|
|
|
+ '温度': temperature,
|
|
|
+ })
|
|
|
+ # 转换为DataFrame
|
|
|
+ df = pd.DataFrame(all_data, columns=['时间', '温度'])
|
|
|
+ # 按时间排序
|
|
|
+ df = df.sort_values('时间').reset_index(drop=True)
|
|
|
+ return df
|