113 lines
5.2 KiB
Python
113 lines
5.2 KiB
Python
import mysql.connector
|
|
from mysql.connector import errorcode
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
DB_CONFIG = {
|
|
'user': 'zsjie', # 您的数据库用户名
|
|
'password': 'xRekX6Cc3RRK6mBe', # 您的数据库密码
|
|
'host': '111.180.203.166', # 您的数据库主机名 (例如: '127.0.0.1' 或 'localhost')
|
|
'database': 'zsjie', # 您要连接的数据库名称
|
|
'port': 25506,
|
|
'raise_on_warnings': True # 在警告时引发异常
|
|
}
|
|
|
|
def connect_to_mysql():
|
|
"""连接到 MySQL 数据库并返回连接和游标对象。"""
|
|
try:
|
|
cnx = mysql.connector.connect(**DB_CONFIG)
|
|
cursor = cnx.cursor()
|
|
print("成功连接到 MySQL 数据库")
|
|
return cnx, cursor
|
|
except mysql.connector.Error as err:
|
|
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
|
|
print("访问被拒绝:用户名或密码错误")
|
|
elif err.errno == errorcode.ER_BAD_DB_ERROR:
|
|
print(f"数据库 '{DB_CONFIG['database']}' 不存在")
|
|
else:
|
|
print(f"连接 MySQL 时发生错误: {err}")
|
|
return None, None
|
|
|
|
def getHttpContext(url):
|
|
headers = {
|
|
'authority': 'www.zsjie.com',
|
|
'method': 'POST', # 通常 requests 库会自动处理,但明确写出无妨
|
|
'path': '/wp-admin/admin-ajax.php', # 通常 requests 库会自动处理
|
|
'scheme': 'https', # 通常 requests 库会自动处理
|
|
'Accept': 'text/html,*/*;q=0.01',
|
|
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
# 从图片中复制的 Cookie 值非常长,并且可能已过期或特定于某个会话。
|
|
# 您需要确保使用有效的 Cookie。
|
|
# 例如: 'PHPSESSID=your_session_id; wordpress_logged_in_...=your_login_cookie_value'
|
|
'Cookie': 'wordpress_012aa954c15bd0242a65bd94f397e616=xiaohu123%7C1750330106%7CFksFn5yihGolM16iilCGsndgKLMG7gJNsDOfHU2ubT0%7C61270247574c98cc268948c3c7c816a59384f854f68a4628f7cf28eac5cbe494; wordpress_sec_012aa954c15bd0242a65bd94f397e616=xiaohu123%7C1750330106%7CrTY9VDsB6sR3epotTH0AkYKpC58mmV6mQL1zF8aD92m%7Cb0bccbd512648a4019f6cdd38fd79dd95d2ee24b9c5e15f4f32682699306209e; PHPSESSID=5eb3ga6pcdliincbicktc0tvbe; wordpress_logged_in_012aa954c15bd0242a65bd94f397e616=xiaohu123%7C1750330106%7CrTY9VDsB6sR3epotTH0AkYKpC58mmV6mQL1zF8aD92m%7C516a19884972ca894cbaef06c0b5008763ba65df9d1118c1bccdba93d14368c6',
|
|
'Origin': 'https://www.zsjie.com',
|
|
'Priority': 'u=1, i', # 这个头可能不是所有情况都必须
|
|
'Referer': 'https://www.zsjie.com/61007.html', # 指示请求的来源页面
|
|
'Sec-Ch-Ua': '"Chromium";v="136", "Microsoft Edge";v="136", "Not.A/Brand";v="99"',
|
|
'Sec-Ch-Ua-Mobile': '?0', # ?0 表示非移动设备
|
|
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
'Sec-Fetch-Dest': 'empty',
|
|
'Sec-Fetch-Mode': 'cors',
|
|
'Sec-Fetch-Site': 'same-origin',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',
|
|
'X-Requested-With': 'XMLHttpRequest' # 表明这是一个 AJAX 请求
|
|
}
|
|
payload1 = {'action':'add_post_views_num', 'id':'61007'}
|
|
payload = {'action':'get_async_shop_down', 'post_id':'61007'}
|
|
response = requests.post( url=url, headers=headers, params=payload1, verify=False)
|
|
print(response.text)
|
|
|
|
response = requests.post( url=url, headers=headers, params=payload, verify=False)
|
|
print(response.text)
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
div_elements = soup.find_all('div', class_='btn-group btn-block mt-2')
|
|
target_link_prefix = "https://www.zsjie.com/goto?"
|
|
extracted_links = []
|
|
# if not div_elements:
|
|
# print(f"未找到 class div 元素。")
|
|
# print(f"找到了 {len(div_elements)} 个 div 元素。")
|
|
# for div in div_elements:
|
|
# # 在每个符合条件的 div 中查找所有的 a 标签
|
|
# a_tags = div.find_all('a', href=True) # href=True 确保只选择有 href 属性的 a 标签
|
|
# if not a_tags:
|
|
# print(f"在 div 中未找到 <a> 标签。")
|
|
# continue
|
|
#
|
|
# for a_tag in a_tags:
|
|
# href = a_tag['href']
|
|
# # 检查 href 是否以目标前缀开头
|
|
# if href.startswith(target_link_prefix):
|
|
# extracted_links.append(href)
|
|
# print(f" 提取到链接: {href}")
|
|
# else:
|
|
# print(f" 跳过链接 (前缀不匹配): {href}")
|
|
|
|
|
|
|
|
def spide():
|
|
cnx, cursor = connect_to_mysql()
|
|
try:
|
|
cursor.execute("select * from resource where resource_url =' ' and available_flag !='0' order by id desc limit 0,50 ")
|
|
results = cursor.fetchall()
|
|
if results:
|
|
for item in results:
|
|
print(item)
|
|
getHttpContext(item[1])
|
|
|
|
finally:
|
|
if cnx:
|
|
cnx.close()
|
|
print("cnx 已关闭!")
|
|
if cursor:
|
|
cursor.close()
|
|
print("cursor 已关闭!")
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# spide()
|
|
getHttpContext('https://www.zsjie.com/wp-admin/admin-ajax.php')
|