Browse Source

第一张图截取完成

huangyan 9 months ago
parent
commit
f67e4c0843

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

BIN
nmc/__pycache__/__init__.cpython-311.pyc


BIN
nmc/__pycache__/compress_image.cpython-311.pyc


BIN
nmc/__pycache__/middlewares.cpython-311.pyc


BIN
nmc/__pycache__/pipelines.cpython-311.pyc


BIN
nmc/__pycache__/settings.cpython-311.pyc


+ 7 - 2
nmc/compress_image.py

@@ -4,6 +4,7 @@ from PIL import Image
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.common.keys import Keys
 
+
 def compress_image(image_path, output_path, quality=90, resize=None):
     """压缩图片并保存到指定路径。
 
@@ -17,9 +18,12 @@ def compress_image(image_path, output_path, quality=90, resize=None):
     img = Image.open(image_path)
     img.save(output_path, optimize=True, quality=quality)
 
+
 def scroll_to_element(driver, element):
     """滚动到指定元素位置。"""
     driver.execute_script("arguments[0].scrollIntoView();", element)
+
+
 def capture_element_screenshot(driver, element, filename):
     """Captures screenshot of the given element and saves it."""
     location = element.location
@@ -31,5 +35,6 @@ def capture_element_screenshot(driver, element, filename):
     top = location['y']
     right = location['x'] + size['width']
     bottom = location['y'] + size['height']
-    cropped_image = image.crop((left, top, right, bottom)) # 注意这里的顺序是(left, top, right, bottom),可能需要调整为(left, top, right, bottom)取决于坐标系
-    cropped_image.save(filename)
+    cropped_image = image.crop(
+        (left, top, right, bottom))  # 注意这里的顺序是(left, top, right, bottom),可能需要调整为(left, top, right, bottom)取决于坐标系
+    cropped_image.save(filename)

+ 1 - 1
nmc/settings.py

@@ -15,7 +15,7 @@ NEWSPIDER_MODULE = "nmc.spiders"
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
 REACTOR = 'twisted.internet.selectreactor.SelectReactor'
-LOG_LEVEL = 'DEBUG'
+LOG_LEVEL = 'ERROR'
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = False
 # Configure maximum concurrent requests performed by Scrapy (default: 16)

BIN
nmc/spiders/__pycache__/__init__.cpython-311.pyc


BIN
nmc/spiders/__pycache__/nmcspider.cpython-311.pyc


+ 36 - 11
nmc/spiders/nmcspider.py

@@ -1,5 +1,6 @@
 import io
 import os.path
+import time
 
 import scrapy
 from PIL import Image
@@ -9,6 +10,8 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 
+from nmc.compress_image import capture_element_screenshot
+
 
 class NmcspiderSpider(scrapy.Spider):
     name = "nmcspider"
@@ -24,8 +27,9 @@ class NmcspiderSpider(scrapy.Spider):
         # 添加路径到chrome_options
         chrome_options.add_argument('--webdriver-executable-path=' + chrome_path)
         # 如果需要添加其他选项,例如禁用浏览器窗口,可以这样:
-        # chrome_options.add_argument('--headless')
-        # chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--headless')
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--no-sandbox')
         self.driver = webdriver.Chrome(options=chrome_options)
 
     def closed(self, reason):
@@ -59,25 +63,46 @@ class NmcspiderSpider(scrapy.Spider):
         city = response.meta['city']
         self.driver.get(response.url)
         self.driver.set_window_size(1920, 1080)
-        title = response.xpath('//*[@id="realChart"]/div[1]/span[1]/text()').get()
         # 创建文件目录
         path_to_save = os.path.join(province, city)
         full_path = os.path.join(".", path_to_save)
         if not os.path.exists(full_path):
             os.makedirs(full_path)
         try:
-            elemen = self.driver.find_element(By.ID, 'forecastChart')
-            self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", elemen)
-            element = WebDriverWait(self.driver, 20).until(
-                EC.presence_of_element_located((By.ID, 'forecastChart')))
-            location = elemen.location
-            size = elemen.size
+            # 获取第一张图片-天气预报
+            # element = WebDriverWait(self.driver, 20).until(
+            #     EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div')))
+            # self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
+            # location = element.location
+            # size = element.size
+            # # 获取天气预报图
+            # # 使用PIL和numpy处理图像
+            # screenshot = self.driver.get_screenshot_as_png()
+            # image = Image.open(io.BytesIO(screenshot))
+            # crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
+            # cropped_image = image.crop(crop_area)
+            # # 保存裁剪后的图片
+            # image_path = os.path.join(full_path, city + '天气预报图.png')
+            # cropped_image.save(image_path)
+            # 获取第二张图片
+            highcharts = WebDriverWait(self.driver, 20).until(
+                EC.presence_of_element_located((By.ID, 'footer')))
+            self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
+                                       highcharts)
+            # self.driver.execute_script(
+            #     """var elem = document.getElementById('tempchart');
+            #     elem.scrollIntoView({behavior: "instant", block: "center"});""")
+
+            time.sleep(5)
+            location = highcharts.location
+            size = highcharts.size
+            print(location, size, "================")
             # 获取天气预报图
             # 使用PIL和numpy处理图像
             screenshot = self.driver.get_screenshot_as_png()
             image = Image.open(io.BytesIO(screenshot))
-            crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
-            print(crop_area, "=========================")
+            crop_area = (
+                location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
             cropped_image = image.crop(crop_area)
             # 保存裁剪后的图片
             image_path = os.path.join(full_path, city + '天气预报图.png')