1 年之前 · f67e4c0843
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings">
			
 
				+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/nmc/__pycache__/__init__.cpython-311.pyc
+++ b/nmc/__pycache__/__init__.cpython-311.pyc
--- a/nmc/__pycache__/compress_image.cpython-311.pyc
+++ b/nmc/__pycache__/compress_image.cpython-311.pyc
--- a/nmc/__pycache__/middlewares.cpython-311.pyc
+++ b/nmc/__pycache__/middlewares.cpython-311.pyc
--- a/nmc/__pycache__/pipelines.cpython-311.pyc
+++ b/nmc/__pycache__/pipelines.cpython-311.pyc
--- a/nmc/__pycache__/settings.cpython-311.pyc
+++ b/nmc/__pycache__/settings.cpython-311.pyc
--- a/nmc/compress_image.py
+++ b/nmc/compress_image.py
@@ -4,6 +4,7 @@ from PIL import Image
 
				 from selenium.webdriver.common.action_chains import ActionChains
			
 
				 from selenium.webdriver.common.keys import Keys
			
 
				 
			
 
				+
			
 
				 def compress_image(image_path, output_path, quality=90, resize=None):
			
 
				     """压缩图片并保存到指定路径。
			
 
				 
			
@@ -17,9 +18,12 @@ def compress_image(image_path, output_path, quality=90, resize=None):
 
				     img = Image.open(image_path)
			
 
				     img.save(output_path, optimize=True, quality=quality)
			
 
				 
			
 
				+
			
 
				 def scroll_to_element(driver, element):
			
 
				     """滚动到指定元素位置。"""
			
 
				     driver.execute_script("arguments[0].scrollIntoView();", element)
			
 
				+
			
 
				+
			
 
				 def capture_element_screenshot(driver, element, filename):
			
 
				     """Captures screenshot of the given element and saves it."""
			
 
				     location = element.location
			
@@ -31,5 +35,6 @@ def capture_element_screenshot(driver, element, filename):
 
				     top = location['y']
			
 
				     right = location['x'] + size['width']
			
 
				     bottom = location['y'] + size['height']
			
 
				-    cropped_image = image.crop((left, top, right, bottom)) # 注意这里的顺序是(left, top, right, bottom)，可能需要调整为(left, top, right, bottom)取决于坐标系
			
 
				-    cropped_image.save(filename)
			
 
				+    cropped_image = image.crop(
			
 
				+        (left, top, right, bottom))  # 注意这里的顺序是(left, top, right, bottom)，可能需要调整为(left, top, right, bottom)取决于坐标系
			
 
				+    cropped_image.save(filename)
			
--- a/nmc/settings.py
+++ b/nmc/settings.py
@@ -15,7 +15,7 @@ NEWSPIDER_MODULE = "nmc.spiders"
 
				 # Crawl responsibly by identifying yourself (and your website) on the user-agent
			
 
				 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
			
 
				 REACTOR = 'twisted.internet.selectreactor.SelectReactor'
			
 
				-LOG_LEVEL = 'DEBUG'
			
 
				+LOG_LEVEL = 'ERROR'
			
 
				 # Obey robots.txt rules
			
 
				 ROBOTSTXT_OBEY = False
			
 
				 # Configure maximum concurrent requests performed by Scrapy (default: 16)
			
--- a/nmc/spiders/__pycache__/__init__.cpython-311.pyc
+++ b/nmc/spiders/__pycache__/__init__.cpython-311.pyc
--- a/nmc/spiders/__pycache__/nmcspider.cpython-311.pyc
+++ b/nmc/spiders/__pycache__/nmcspider.cpython-311.pyc
--- a/nmc/spiders/nmcspider.py
+++ b/nmc/spiders/nmcspider.py
@@ -1,5 +1,6 @@
 
				 import io
			
 
				 import os.path
			
 
				+import time
			
 
				 
			
 
				 import scrapy
			
 
				 from PIL import Image
			
@@ -9,6 +10,8 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.wait import WebDriverWait
			
 
				 
			
 
				+from nmc.compress_image import capture_element_screenshot
			
 
				+
			
 
				 
			
 
				 class NmcspiderSpider(scrapy.Spider):
			
 
				     name = "nmcspider"
			
@@ -24,8 +27,9 @@ class NmcspiderSpider(scrapy.Spider):
 
				         # 添加路径到chrome_options
			
 
				         chrome_options.add_argument('--webdriver-executable-path=' + chrome_path)
			
 
				         # 如果需要添加其他选项，例如禁用浏览器窗口，可以这样：
			
 
				-        # chrome_options.add_argument('--headless')
			
 
				-        # chrome_options.add_argument('--disable-gpu')
			
 
				+        chrome_options.add_argument('--headless')
			
 
				+        chrome_options.add_argument('--disable-gpu')
			
 
				+        chrome_options.add_argument('--no-sandbox')
			
 
				         self.driver = webdriver.Chrome(options=chrome_options)
			
 
				 
			
 
				     def closed(self, reason):
			
@@ -59,25 +63,46 @@ class NmcspiderSpider(scrapy.Spider):
 
				         city = response.meta['city']
			
 
				         self.driver.get(response.url)
			
 
				         self.driver.set_window_size(1920, 1080)
			
 
				-        title = response.xpath('//*[@id="realChart"]/div[1]/span[1]/text()').get()
			
 
				         # 创建文件目录
			
 
				         path_to_save = os.path.join(province, city)
			
 
				         full_path = os.path.join(".", path_to_save)
			
 
				         if not os.path.exists(full_path):
			
 
				             os.makedirs(full_path)
			
 
				         try:
			
 
				-            elemen = self.driver.find_element(By.ID, 'forecastChart')
			
 
				-            self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", elemen)
			
 
				-            element = WebDriverWait(self.driver, 20).until(
			
 
				-                EC.presence_of_element_located((By.ID, 'forecastChart')))
			
 
				-            location = elemen.location
			
 
				-            size = elemen.size
			
 
				+            # 获取第一张图片-天气预报
			
 
				+            # element = WebDriverWait(self.driver, 20).until(
			
 
				+            #     EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div')))
			
 
				+            # self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
			
 
				+            # location = element.location
			
 
				+            # size = element.size
			
 
				+            # # 获取天气预报图
			
 
				+            # # 使用PIL和numpy处理图像
			
 
				+            # screenshot = self.driver.get_screenshot_as_png()
			
 
				+            # image = Image.open(io.BytesIO(screenshot))
			
 
				+            # crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
			
 
				+            # cropped_image = image.crop(crop_area)
			
 
				+            # # 保存裁剪后的图片
			
 
				+            # image_path = os.path.join(full_path, city + '天气预报图.png')
			
 
				+            # cropped_image.save(image_path)
			
 
				+            # 获取第二张图片
			
 
				+            highcharts = WebDriverWait(self.driver, 20).until(
			
 
				+                EC.presence_of_element_located((By.ID, 'footer')))
			
 
				+            self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
			
 
				+                                       highcharts)
			
 
				+            # self.driver.execute_script(
			
 
				+            #     """var elem = document.getElementById('tempchart');
			
 
				+            #     elem.scrollIntoView({behavior: "instant", block: "center"});""")
			
 
				+
			
 
				+            time.sleep(5)
			
 
				+            location = highcharts.location
			
 
				+            size = highcharts.size
			
 
				+            print(location, size, "================")
			
 
				             # 获取天气预报图
			
 
				             # 使用PIL和numpy处理图像
			
 
				             screenshot = self.driver.get_screenshot_as_png()
			
 
				             image = Image.open(io.BytesIO(screenshot))
			
 
				-            crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
			
 
				-            print(crop_area, "=========================")
			
 
				+            crop_area = (
			
 
				+                location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
			
 
				             cropped_image = image.crop(crop_area)
			
 
				             # 保存裁剪后的图片
			
 
				             image_path = os.path.join(full_path, city + '天气预报图.png')