palywrigh中文文档

palywright截图

palywright元素截图

from playwright.sync_api import sync_playwright
port=9222
with sync_playwright() as playwright:
    chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
    page = chrome_context.pages[0]
    # 等待搜索框加载完成
    search_box = page.wait_for_selector('#wrap-base')
    # 获取搜索框的边界框
    box = search_box.bounding_box()
    # 截取搜索框的截图
    screenshot = search_box.screenshot(path='screenshot.png')
    print(f'截图已保存到 {screenshot}')

遍历子元素,并对每个子元素进行截图;

from playwright.sync_api import sync_playwright
import docx
doc = docx.Document()
port=9222
with sync_playwright() as playwright:
    chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
    page = chrome_context.pages[0]
    # 等待搜索框加载完成
    # search_box = page.wait_for_selector('#wrap-base')
    # 获取父元素
    parent_selector = '#content1'
    parent_element = page.wait_for_selector(parent_selector)
    print(parent_element,type(parent_element))
    # 获取所有子元素
    child_elements = parent_element.query_selector_all('div')
    # print(child_elements)
    # 遍历子元素列表,对每个子元素进行操作
    for i, child_element in enumerate(child_elements):
        # 获取子元素的边界框
        print(i)
        box = child_element.bounding_box()
        # 截图子元素
        try:
            screenshot = child_element.screenshot(path=f'{i}.png')
        except:
            pass



doc.add_picture('1.png')
doc.save('截图.docx')

用playwright截图的方法,和将图片保存到文档里面的方法;保存文档后,图片自适应调整宽度;


def img_screenshot(page,path_selector):
    '''
    palywright截图;给定selector元素进行截图
    :path_selector 需要截图的路径
    :param page:
    :return:
    '''
    # 等待搜索框加载完成
    print('---------进入截图模块---------')
    search_box = page.wait_for_selector(path_selector)
    # 获取搜索框的边界框
    box = search_box.bounding_box()
    # 截取搜索框的截图
    save_img_path = 'screenshot.png'
    screenshot = search_box.screenshot(path=save_img_path)
    print(f'截图已保存到 {screenshot}')

    return save_img_path

def imgToDocx(doc,text_content,imgName):
    '''
    将文字和图片追加写入文档内;
    图片自动调整适应文档宽度,
    :param doc: 文档对象
    :param textcotun: 需要追加写入的文本
    :param imgName: 追加添加的图片
    :return:
    '''
    doc.add_paragraph(text_content)#追加将文本写入文档
    doc.add_picture(imgName)#追加将图片写入文档内
    # 获取插入的图片
    shape = doc.inline_shapes[-1]
    # 获取文档的页边距、页面宽度、页面高度等信息
    section = doc.sections[-1]
    page_width = section.page_width - section.left_margin - section.right_margin
    page_height = section.page_height - section.top_margin - section.bottom_margin
    # 计算图片的宽高比例
    width, height = shape.width, shape.height
    aspect_ratio = width / height
    # 根据文档的尺寸和图片的宽高比例,调整图片的大小
    if aspect_ratio > page_width / page_height:
        shape.width = int(page_width)
        shape.height = int(page_width / aspect_ratio)
    else:
        shape.height = int(page_height)
        shape.width = int(page_height * aspect_ratio)
    pass

调用截图和保存文档的方法

from playwright.sync_api import sync_playwright
import docx
from public import img_screenshot, imgToDocx
doc = docx.Document()
port=9222
with sync_playwright() as playwright:
    chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
    page = chrome_context.pages[0]
    print(page)
    print('开始截图')
    path_selector='body > div.container > div.container1 > div.result > div.page > div.nameBox.clearfix'
    imgName=img_screenshot(page, path_selector)
    print('截图完成')
    text_content='保存图片。。。。。。。'
    imgToDocx(doc, text_content, imgName)
doc.save('截图保存.docx')

屏幕截图

from playwright.sync_api import Playwright, sync_playwright
with sync_playwright() as p:
    browser = p.chromium.launch()
    page = browser.new_page()
    page.goto('https://www.baidu.com')
    page.screenshot(path=r'截图\baidu.png')
    browser.close()

页面刷新

在 Playwright 中刷新页面可以使用 page.reload() 方法。这个方法会重新加载当前页,类似于手动点击浏览器的刷新按钮。
以下是使用 Playwright 实现页面刷新的示例代码:

from playwright.sync_api import Playwright, sync_playwright
with sync_playwright() as p:
    browser = p.chromium.launch()
    page = browser.new_page()
    page.goto('https://www.example.com')
    # 等待页面加载
    page.wait_for_load_state()
    # 刷新页面
    page.reload()
    # 等待页面加载
    page.wait_for_load_state()
    browser.close()

在上面的代码中,我们首先使用 p.chromium.launch() 方法启动 Chromium 浏览器。然后使用 browser.new_page() 方法创建一个新页面,并使用 page.goto() 方法打开 https://www.example.com 网址。接着使用 page.wait_for_load_state() 等待页面加载完成,然后调用 page.reload() 方法刷新页面。最后再次使用 page.wait_for_load_state() 等待页面加载完成,最后使用 browser.close() 方法关闭浏览器。

滚动页面

import time
from playwright.sync_api import Playwright, sync_playwright
def main(playwright: Playwright) -> None:
    browser = playwright.chromium.launch()
    page = browser.new_page()
    page.goto('https://www.example.com')

    # 将页面滚动到最下方
    last_position = None
    while True:
        page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
        time.sleep(1)
        current_position = page.evaluate_handle('window.pageYOffset').json_value()
        if current_position == last_position:
            break
        last_position = current_position

    # 关闭浏览器
    browser.close()
with sync_playwright() as playwright:
    main(playwright)

  目录