使用pyppeteer进行页面抓取

这个模块很不错,暂时记录下使用代码,有时间详细记录。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import asyncio
import time
import ssl
from pyppeteer import launch

ssl._create_default_https_context = ssl._create_unverified_context


async def main():
browser = await launch(headless=True)
page = await browser.newPage()
await page.goto('http://127.0.0.1:8000/')

await page.click('#login_user')
await page.type('#login_user', 'admin')

await page.click('#password')
await page.type('#password', '123456')

await page.click('#login-submit')

await page.waitForNavigation()

await page.setViewport({
'width': 1350,
'height': 850
})

await page.screenshot({'path': 'dashboard.png'})

await page.goto('http://127.0.0.1:8000/sql_healthy')

time.sleep(3)

await page.screenshot({'path': 'healthy.png'})

await page.setViewport({
'width': 1350,
'height': 1100
})

await page.goto('http://127.0.0.1:8000/online_audit/overview')
time.sleep(3)
await page.screenshot({'path': 'online_audit.png'})

dimensions = await page.evaluate('''() => {
return {
width: 1600,
height: 1200,
deviceScaleFactor: window.devicePixelRatio,
}
}''')

print(dimensions)
# >>> {'width': 00, 'height': 600, 'deviceScaleFactor': 1}
await browser.close()


asyncio.get_event_loop().run_until_complete(main())
知识就是财富
如果您觉得文章对您有帮助, 欢迎请我喝杯水!