|
@@ -1,6 +1,7 @@
|
|
|
import io
|
|
|
import os.path
|
|
|
import time
|
|
|
+from datetime import datetime
|
|
|
|
|
|
import scrapy
|
|
|
from PIL import Image
|
|
@@ -13,6 +14,10 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
|
from nmc.compress_image import capture_element_screenshot
|
|
|
|
|
|
|
|
|
+def js_is_complete(driver):
|
|
|
+ return driver.execute_script("return document.readyState") == "complete"
|
|
|
+
|
|
|
+
|
|
|
class NmcspiderSpider(scrapy.Spider):
|
|
|
name = "nmcspider"
|
|
|
allowed_domains = ["www.nmc.cn"]
|
|
@@ -64,7 +69,8 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
self.driver.get(response.url)
|
|
|
self.driver.set_window_size(1920, 1080)
|
|
|
# 创建文件目录
|
|
|
- path_to_save = os.path.join(province, city)
|
|
|
+ today_str = datetime.now().strftime("%Y-%m-%d")
|
|
|
+ path_to_save = os.path.join(province, city, today_str)
|
|
|
full_path = os.path.join(".", path_to_save)
|
|
|
if not os.path.exists(full_path):
|
|
|
os.makedirs(full_path)
|
|
@@ -86,26 +92,43 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
# cropped_image.save(image_path)
|
|
|
# 获取第二张图片
|
|
|
highcharts = WebDriverWait(self.driver, 20).until(
|
|
|
- EC.presence_of_element_located((By.ID, 'footer')))
|
|
|
+ EC.presence_of_element_located((By.ID, 'realChart')))
|
|
|
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
|
|
|
highcharts)
|
|
|
- # self.driver.execute_script(
|
|
|
- # """var elem = document.getElementById('tempchart');
|
|
|
- # elem.scrollIntoView({behavior: "instant", block: "center"});""")
|
|
|
-
|
|
|
- time.sleep(5)
|
|
|
- location = highcharts.location
|
|
|
- size = highcharts.size
|
|
|
- print(location, size, "================")
|
|
|
+ elem = WebDriverWait(self.driver, 20).until(
|
|
|
+ EC.presence_of_element_located((By.ID, 'realChart')))
|
|
|
+ time.sleep(2)
|
|
|
+ # 等待js全部加载完成
|
|
|
+ js_is_complete(self.driver)
|
|
|
+ WebDriverWait(self.driver, 20).until(js_is_complete)
|
|
|
+ # 获取屏幕截图
|
|
|
+ # self.driver.save_screenshot('screenshot.png')
|
|
|
+ screenshot = self.driver.get_screenshot_as_png()
|
|
|
+ script = """
|
|
|
+ var element = document.getElementById('realChart');
|
|
|
+ var rect = element.getBoundingClientRect();
|
|
|
+ return {left: rect.left, top: rect.top, width: rect.width, height: rect.height};
|
|
|
+ """
|
|
|
+ position_info = self.driver.execute_script(script)
|
|
|
+ print(position_info)
|
|
|
# 获取天气预报图
|
|
|
# 使用PIL和numpy处理图像
|
|
|
- screenshot = self.driver.get_screenshot_as_png()
|
|
|
image = Image.open(io.BytesIO(screenshot))
|
|
|
+ # location = elem.location
|
|
|
+ # size = elem.size
|
|
|
+ # print(f"Element Location: {location}, Size: {size}")
|
|
|
+ #
|
|
|
+ # crop_area = (
|
|
|
+ # location['x'], location['y'], location['x'] + size['width'],
|
|
|
+ # location['y'] + size['height'])
|
|
|
crop_area = (
|
|
|
- location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
+ position_info['left'], position_info['top'],
|
|
|
+ position_info['left'] + position_info['width'],
|
|
|
+ position_info['top'] + position_info['height'])
|
|
|
cropped_image = image.crop(crop_area)
|
|
|
- # 保存裁剪后的图片
|
|
|
+ # # 保存裁剪后的图片
|
|
|
image_path = os.path.join(full_path, city + '天气预报图.png')
|
|
|
+ # image.save(image_path)
|
|
|
cropped_image.save(image_path)
|
|
|
|
|
|
finally:
|