LiuYuan's Blog
This is LiuYuan's Blog
Toggle navigation
LiuYuan's Blog
Home
Docker相关
MySQL相关
Ansible相关
维护脚本相关
Windows系统脚本
Python相关
Nginx相关
About Me
Archives
Tags
微博评论爬取
2024-06-20 10:14:25
55
0
0
admin
import requests from fake_useragent import UserAgent import csv from datetime import datetime def convert_created_at(created_at): dt = datetime.strptime(created_at, '%a %b %d %H:%M:%S +0800 %Y') formatted_created_at = dt.strftime('%Y-%m-%d %H:%M:%S') return formatted_created_at def get_weibo_mid(weibo_id): url = f'https://weibo.com/ajax/statuses/show?id={weibo_id}' header = { 'user-agent': UserAgent().random } response = requests.get(url=url, headers=header) if response.status_code == 200: try: json_data = response.json() weibo_mid = json_data.get('mid') if weibo_mid: return weibo_mid else: print("未找到微博 MID。") except Exception as e: print(f"解析微博 MID 时发生异常:{e}") else: print(f"无法获取微博 MID。状态码:{response.status_code}") return None def get_weibo_comments(weibo_id, csv_filename): header = { 'user-agent': UserAgent().random } with open(csv_filename, mode='w', encoding='utf-8-sig', newline='') as file: csv_writer = csv.writer(file) csv_writer.writerow(['User', 'Comment', 'Created_at']) max_id = '' while max_id != 0: pl_url = f'https://weibo.com/ajax/statuses/buildComments?flow=1&is_reload=1&id={weibo_id}&is_show_bulletin=2&is_mix=0&max_id={max_id}&count=10' resp = requests.get(url=pl_url, headers=header) json_data = resp.json() max_id = json_data['max_id'] lis = json_data['data'] for li in lis: user = li['user']['screen_name'] text_raw = li['text_raw'] created_at = li['created_at'] formatted_created_at = convert_created_at(created_at) print(user, ':', text_raw, formatted_created_at) csv_writer.writerow([user, text_raw, formatted_created_at]) if __name__ == "__main__": weibo_id = input('请输入微博的id:') weibo_mid = get_weibo_mid(weibo_id) if weibo_mid: csv_filename = f'weibo_{weibo_mid}.csv' get_weibo_comments(weibo_mid, csv_filename)
Pre:
前端调取脚本示例
Next:
表格处理
0
likes
55
Weibo
Wechat
Tencent Weibo
QQ Zone
RenRen
Footer
Table of content