Browse Source

第二张图片

huangyan 9 months ago
parent
commit
9a8b96ab1c
3 changed files with 36 additions and 13 deletions
  1. BIN
      nmc/spiders/__pycache__/nmcspider.cpython-311.pyc
  2. 36 13
      nmc/spiders/nmcspider.py
  3. BIN
      screenshot.png

BIN
nmc/spiders/__pycache__/nmcspider.cpython-311.pyc


+ 36 - 13
nmc/spiders/nmcspider.py

@@ -1,6 +1,7 @@
 import io
 import os.path
 import time
+from datetime import datetime
 
 import scrapy
 from PIL import Image
@@ -13,6 +14,10 @@ from selenium.webdriver.support.wait import WebDriverWait
 from nmc.compress_image import capture_element_screenshot
 
 
+def js_is_complete(driver):
+    return driver.execute_script("return document.readyState") == "complete"
+
+
 class NmcspiderSpider(scrapy.Spider):
     name = "nmcspider"
     allowed_domains = ["www.nmc.cn"]
@@ -64,7 +69,8 @@ class NmcspiderSpider(scrapy.Spider):
         self.driver.get(response.url)
         self.driver.set_window_size(1920, 1080)
         # 创建文件目录
-        path_to_save = os.path.join(province, city)
+        today_str = datetime.now().strftime("%Y-%m-%d")
+        path_to_save = os.path.join(province, city, today_str)
         full_path = os.path.join(".", path_to_save)
         if not os.path.exists(full_path):
             os.makedirs(full_path)
@@ -86,26 +92,43 @@ class NmcspiderSpider(scrapy.Spider):
             # cropped_image.save(image_path)
             # 获取第二张图片
             highcharts = WebDriverWait(self.driver, 20).until(
-                EC.presence_of_element_located((By.ID, 'footer')))
+                EC.presence_of_element_located((By.ID, 'realChart')))
             self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
                                        highcharts)
-            # self.driver.execute_script(
-            #     """var elem = document.getElementById('tempchart');
-            #     elem.scrollIntoView({behavior: "instant", block: "center"});""")
-
-            time.sleep(5)
-            location = highcharts.location
-            size = highcharts.size
-            print(location, size, "================")
+            elem = WebDriverWait(self.driver, 20).until(
+                EC.presence_of_element_located((By.ID, 'realChart')))
+            time.sleep(2)
+            # 等待js全部加载完成
+            js_is_complete(self.driver)
+            WebDriverWait(self.driver, 20).until(js_is_complete)
+            # 获取屏幕截图
+            # self.driver.save_screenshot('screenshot.png')
+            screenshot = self.driver.get_screenshot_as_png()
+            script = """
+            var element = document.getElementById('realChart');
+            var rect = element.getBoundingClientRect();
+            return {left: rect.left, top: rect.top, width: rect.width, height: rect.height};
+            """
+            position_info = self.driver.execute_script(script)
+            print(position_info)
             # 获取天气预报图
             # 使用PIL和numpy处理图像
-            screenshot = self.driver.get_screenshot_as_png()
             image = Image.open(io.BytesIO(screenshot))
+            # location = elem.location
+            # size = elem.size
+            # print(f"Element Location: {location}, Size: {size}")
+            #
+            # crop_area = (
+            #     location['x'], location['y'], location['x'] + size['width'],
+            #     location['y'] + size['height'])
             crop_area = (
-                location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
+                position_info['left'], position_info['top'],
+                position_info['left'] + position_info['width'],
+                position_info['top'] + position_info['height'])
             cropped_image = image.crop(crop_area)
-            # 保存裁剪后的图片
+            # # 保存裁剪后的图片
             image_path = os.path.join(full_path, city + '天气预报图.png')
+            # image.save(image_path)
             cropped_image.save(image_path)
 
         finally:

BIN
screenshot.png