mirror of
				https://github.com/yuanwangokk-1/TV-BOX.git
				synced 2025-10-24 23:11:21 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			563 lines
		
	
	
		
			No EOL
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			563 lines
		
	
	
		
			No EOL
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| # 爱壹帆 - https://www.iyf.lv/
 | ||
| import re
 | ||
| import sys
 | ||
| import json
 | ||
| import time
 | ||
| from urllib.parse import quote, unquote
 | ||
| from pyquery import PyQuery as pq
 | ||
| sys.path.append('..')
 | ||
| from base.spider import Spider
 | ||
| 
 | ||
| class Spider(Spider):
 | ||
|     def init(self, extend=""):
 | ||
|         pass
 | ||
| 
 | ||
|     def getName(self):
 | ||
|         return "爱壹帆"
 | ||
| 
 | ||
|     def isVideoFormat(self, url):
 | ||
|         pass
 | ||
| 
 | ||
|     def manualVideoCheck(self):
 | ||
|         pass
 | ||
| 
 | ||
|     def destroy(self):
 | ||
|         pass
 | ||
| 
 | ||
|     host = 'https://www.iyf.lv'
 | ||
| 
 | ||
|     headers = {
 | ||
|         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
 | ||
|         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
 | ||
|         'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
 | ||
|         'Accept-Encoding': 'gzip, deflate, br',
 | ||
|         'DNT': '1',
 | ||
|         'Connection': 'keep-alive',
 | ||
|         'Upgrade-Insecure-Requests': '1',
 | ||
|     }
 | ||
| 
 | ||
|     def homeContent(self, filter):
 | ||
|         """获取首页内容和分类"""
 | ||
|         try:
 | ||
|             response = self.fetch_with_encoding(self.host, headers=self.headers)
 | ||
|             doc = self.getpq(response.text)
 | ||
| 
 | ||
|             result = {}
 | ||
|             classes = []
 | ||
| 
 | ||
|             # 获取分类导航 - 基于浏览器分析,分类链接包含/t/
 | ||
|             nav_items = doc('a[href*="/t/"]')
 | ||
|             for item in nav_items.items():
 | ||
|                 text = item.text().strip()
 | ||
|                 href = item.attr('href')
 | ||
|                 if text and href and '/t/' in href:
 | ||
|                     # 提取分类ID
 | ||
|                     type_id = href.split('/t/')[-1].rstrip('/')
 | ||
|                     if type_id.isdigit():
 | ||
|                         classes.append({
 | ||
|                             'type_name': text,
 | ||
|                             'type_id': type_id
 | ||
|                         })
 | ||
| 
 | ||
|             # 获取首页视频列表 - 优先查找包含图片的视频链接
 | ||
|             videos = []
 | ||
|             seen_ids = set()  # 用于去重
 | ||
| 
 | ||
|             # 优先查找包含图片的视频链接(主要的视频项)
 | ||
|             video_links = doc('a[href*="/iyftv/"]').filter(lambda _, e: pq(e).find('img').length > 0)
 | ||
| 
 | ||
|             for link in video_links.items():
 | ||
|                 try:
 | ||
|                     href = link.attr('href') or ''
 | ||
|                     if not href:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 提取视频ID
 | ||
|                     vod_id = href.split('/iyftv/')[-1].rstrip('/')
 | ||
|                     if not vod_id or vod_id in seen_ids:
 | ||
|                         continue
 | ||
| 
 | ||
|                     seen_ids.add(vod_id)  # 添加到已见集合
 | ||
| 
 | ||
|                     # 获取标题 - 优先从图片alt属性获取(最准确)
 | ||
|                     title = ''
 | ||
|                     img_elem = link.find('img')
 | ||
|                     if img_elem:
 | ||
|                         title = img_elem.attr('alt') or ''
 | ||
| 
 | ||
|                     # 如果图片alt为空,尝试其他方式
 | ||
|                     if not title:
 | ||
|                         title = link.attr('title') or ''
 | ||
|                     if not title:
 | ||
|                         # 从链接文本获取,但要过滤掉无关文本
 | ||
|                         link_text = link.text().strip()
 | ||
|                         if link_text and link_text not in ['正片', '详情', '播放', '观看']:
 | ||
|                             title = link_text
 | ||
| 
 | ||
|                     if not title:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 获取图片 - 优先获取data-original(真实图片),避免懒加载占位图
 | ||
|                     pic = ''
 | ||
|                     if img_elem:
 | ||
|                         pic = img_elem.attr('data-original') or img_elem.attr('data-src') or img_elem.attr('src') or ''
 | ||
|                         if pic and not pic.startswith('http'):
 | ||
|                             pic = self.host + pic if pic.startswith('/') else ''
 | ||
| 
 | ||
|                     # 获取备注信息 - 查找可能的备注元素
 | ||
|                     remarks = ''
 | ||
|                     # 查找父容器中的备注信息
 | ||
|                     parent = link.parent()
 | ||
|                     if parent:
 | ||
|                         # 查找集数信息
 | ||
|                         episode_elem = parent.find('.episode, .status, .note')
 | ||
|                         if episode_elem:
 | ||
|                             remarks = episode_elem.text().strip()
 | ||
|                         else:
 | ||
|                             # 查找包含"第"、"集"、"期"等关键字的文本
 | ||
|                             parent_text = parent.text()
 | ||
|                             import re
 | ||
|                             episode_match = re.search(r'第\d+[集期]|更新至|完结|正片', parent_text)
 | ||
|                             if episode_match:
 | ||
|                                 remarks = episode_match.group()
 | ||
| 
 | ||
|                     videos.append({
 | ||
|                         'vod_id': vod_id,
 | ||
|                         'vod_name': self.fix_encoding(title),
 | ||
|                         'vod_pic': pic,
 | ||
|                         'vod_year': '',
 | ||
|                         'vod_remarks': self.fix_encoding(remarks)
 | ||
|                     })
 | ||
|                 except Exception as e:
 | ||
|                     self.log(f"解析视频项时出错: {e}")
 | ||
|                     continue
 | ||
| 
 | ||
|             result['class'] = classes
 | ||
|             result['list'] = videos
 | ||
|             return result
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"获取首页内容时出错: {e}")
 | ||
|             return {'class': [], 'list': []}
 | ||
| 
 | ||
|     def homeVideoContent(self):
 | ||
|         """获取推荐视频"""
 | ||
|         return {'list': []}
 | ||
| 
 | ||
|     def categoryContent(self, tid, pg, filter, extend):
 | ||
|         """获取分类内容"""
 | ||
|         try:
 | ||
|             # 构建分类URL
 | ||
|             url = f"{self.host}/t/{tid}/"
 | ||
|             if int(pg) > 1:
 | ||
|                 url = f"{self.host}/t/{tid}/page/{pg}/"
 | ||
| 
 | ||
|             response = self.fetch_with_encoding(url, headers=self.headers)
 | ||
|             doc = self.getpq(response.text)
 | ||
| 
 | ||
|             # 获取视频列表 - 优先查找包含图片的视频链接
 | ||
|             videos = []
 | ||
|             seen_ids = set()  # 用于去重
 | ||
| 
 | ||
|             # 优先查找包含图片的视频链接(主要的视频项)
 | ||
|             video_links = doc('a[href*="/iyftv/"]').filter(lambda _, e: pq(e).find('img').length > 0)
 | ||
| 
 | ||
|             for link in video_links.items():
 | ||
|                 try:
 | ||
|                     href = link.attr('href') or ''
 | ||
|                     if not href:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 提取视频ID
 | ||
|                     vod_id = href.split('/iyftv/')[-1].rstrip('/')
 | ||
|                     if not vod_id or vod_id in seen_ids:
 | ||
|                         continue
 | ||
| 
 | ||
|                     seen_ids.add(vod_id)  # 添加到已见集合
 | ||
| 
 | ||
|                     # 获取标题 - 优先从图片alt属性获取(最准确)
 | ||
|                     title = ''
 | ||
|                     img_elem = link.find('img')
 | ||
|                     if img_elem:
 | ||
|                         title = img_elem.attr('alt') or ''
 | ||
| 
 | ||
|                     # 如果图片alt为空,尝试其他方式
 | ||
|                     if not title:
 | ||
|                         title = link.attr('title') or ''
 | ||
|                     if not title:
 | ||
|                         # 从链接文本获取,但要过滤掉无关文本
 | ||
|                         link_text = link.text().strip()
 | ||
|                         if link_text and link_text not in ['正片', '详情', '播放', '观看']:
 | ||
|                             title = link_text
 | ||
| 
 | ||
|                     if not title:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 获取图片 - 优先获取data-original(真实图片),避免懒加载占位图
 | ||
|                     pic = ''
 | ||
|                     if img_elem:
 | ||
|                         pic = img_elem.attr('data-original') or img_elem.attr('data-src') or img_elem.attr('src') or ''
 | ||
|                         if pic and not pic.startswith('http'):
 | ||
|                             pic = self.host + pic if pic.startswith('/') else ''
 | ||
| 
 | ||
|                     # 获取备注信息 - 查找集数或状态信息
 | ||
|                     remarks = ''
 | ||
|                     parent = link.parent()
 | ||
|                     if parent:
 | ||
|                         # 查找集数信息
 | ||
|                         episode_elem = parent.find('.episode, .status, .note')
 | ||
|                         if episode_elem:
 | ||
|                             remarks = episode_elem.text().strip()
 | ||
|                         else:
 | ||
|                             # 查找包含"第"、"集"、"期"等关键字的文本
 | ||
|                             parent_text = parent.text()
 | ||
|                             import re
 | ||
|                             episode_match = re.search(r'第\d+[集期]|更新至|完结|正片', parent_text)
 | ||
|                             if episode_match:
 | ||
|                                 remarks = episode_match.group()
 | ||
| 
 | ||
|                     videos.append({
 | ||
|                         'vod_id': vod_id,
 | ||
|                         'vod_name': self.fix_encoding(title),
 | ||
|                         'vod_pic': pic,
 | ||
|                         'vod_year': '',
 | ||
|                         'vod_remarks': self.fix_encoding(remarks)
 | ||
|                     })
 | ||
|                 except Exception as e:
 | ||
|                     self.log(f"解析分类视频项时出错: {e}")
 | ||
|                     continue
 | ||
| 
 | ||
|             result = {
 | ||
|                 'list': videos,
 | ||
|                 'page': pg,
 | ||
|                 'pagecount': 9999,  # 设置一个较大的值
 | ||
|                 'limit': 80,
 | ||
|                 'total': 999999
 | ||
|             }
 | ||
|             return result
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"获取分类内容时出错: {e}")
 | ||
|             return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 80, 'total': 0}
 | ||
| 
 | ||
|     def detailContent(self, ids):
 | ||
|         """获取视频详情"""
 | ||
|         try:
 | ||
|             vod_id = ids[0]
 | ||
|             url = f"{self.host}/iyftv/{vod_id}/"
 | ||
| 
 | ||
|             response = self.fetch_with_encoding(url, headers=self.headers)
 | ||
|             doc = self.getpq(response.text)
 | ||
| 
 | ||
|             # 获取标题
 | ||
|             title_elem = doc('h1')
 | ||
|             title = self.fix_encoding(title_elem.text()) if title_elem else ''
 | ||
| 
 | ||
|             # 获取视频信息 - 查找可能的简介元素
 | ||
|             content = ''
 | ||
|             info_selectors = ['.module-info', '.video-info', '.content', '.description', '.intro']
 | ||
|             for selector in info_selectors:
 | ||
|                 info_elem = doc(selector)
 | ||
|                 if info_elem:
 | ||
|                     content = self.fix_encoding(info_elem.text())
 | ||
|                     break
 | ||
| 
 | ||
|             # 获取播放源和播放列表
 | ||
|             play_from = []
 | ||
|             play_url = []
 | ||
| 
 | ||
|             # 查找播放源标签 - 基于浏览器分析,可能是.module-tab-item
 | ||
|             tab_selectors = ['.module-tab-item', '.tab-item', '.play-source', '.source-tab']
 | ||
|             playlist_selectors = ['.module-play-list', '.play-list', '.episode-list']
 | ||
| 
 | ||
|             tabs = None
 | ||
|             playlists = None
 | ||
| 
 | ||
|             for selector in tab_selectors:
 | ||
|                 tabs = doc(selector)
 | ||
|                 if tabs:
 | ||
|                     break
 | ||
| 
 | ||
|             for selector in playlist_selectors:
 | ||
|                 playlists = doc(selector)
 | ||
|                 if playlists:
 | ||
|                     break
 | ||
| 
 | ||
|             if tabs and playlists:
 | ||
|                 for i, tab in enumerate(tabs.items()):
 | ||
|                     # 获取播放源名称
 | ||
|                     source_name = self.fix_encoding(tab.text().strip())
 | ||
| 
 | ||
|                     if source_name:
 | ||
|                         play_from.append(source_name)
 | ||
| 
 | ||
|                         # 获取对应的播放列表
 | ||
|                         episodes = []
 | ||
|                         if i < len(playlists):
 | ||
|                             episode_items = playlists.eq(i).find('a')
 | ||
|                             for ep in episode_items.items():
 | ||
|                                 ep_title = self.fix_encoding(ep.text().strip())
 | ||
|                                 ep_href = ep.attr('href')
 | ||
|                                 if ep_title and ep_href:
 | ||
|                                     episodes.append(f"{ep_title}${ep_href}")
 | ||
| 
 | ||
|                         play_url.append('#'.join(episodes))
 | ||
| 
 | ||
|             vod = {
 | ||
|                 'vod_id': vod_id,
 | ||
|                 'vod_name': title,
 | ||
|                 'vod_pic': '',
 | ||
|                 'vod_year': '',
 | ||
|                 'vod_remarks': '',
 | ||
|                 'vod_actor': '',
 | ||
|                 'vod_director': '',
 | ||
|                 'vod_content': content,
 | ||
|                 'vod_play_from': '$$$'.join(play_from),
 | ||
|                 'vod_play_url': '$$$'.join(play_url)
 | ||
|             }
 | ||
| 
 | ||
|             return {'list': [vod]}
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"获取视频详情时出错: {e}")
 | ||
|             return {'list': []}
 | ||
| 
 | ||
|     def searchContent(self, key, quick, pg="1"):
 | ||
|         """搜索内容"""
 | ||
|         try:
 | ||
|             # 使用正确的搜索URL格式
 | ||
|             search_url = f"{self.host}/s/-------------/"
 | ||
|             params = {'wd': key}
 | ||
| 
 | ||
|             response = self.fetch_with_encoding(search_url, params=params, headers=self.headers)
 | ||
|             doc = self.getpq(response.text)
 | ||
| 
 | ||
|             # 获取搜索结果 - 基于搜索页面的实际结构
 | ||
|             videos = []
 | ||
|             seen_ids = set()  # 用于去重
 | ||
| 
 | ||
|             # 搜索页面的结构:每个视频在一个容器中,包含图片链接和标题链接
 | ||
|             # 优先查找包含图片的视频链接(主要的视频项)
 | ||
|             video_containers = doc('a[href*="/iyftv/"]').filter(lambda _, e: pq(e).find('img').length > 0)
 | ||
| 
 | ||
|             for link in video_containers.items():
 | ||
|                 try:
 | ||
|                     href = link.attr('href') or ''
 | ||
|                     if not href:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 提取视频ID
 | ||
|                     vod_id = href.split('/iyftv/')[-1].rstrip('/')
 | ||
|                     if not vod_id or vod_id in seen_ids:
 | ||
|                         continue
 | ||
| 
 | ||
|                     seen_ids.add(vod_id)  # 添加到已见集合
 | ||
| 
 | ||
|                     # 获取标题 - 优先从图片alt属性获取
 | ||
|                     title = ''
 | ||
|                     img_elem = link.find('img')
 | ||
|                     if img_elem:
 | ||
|                         title = img_elem.attr('alt') or ''
 | ||
| 
 | ||
|                     # 如果图片alt为空,查找同级或父级的标题链接
 | ||
|                     if not title:
 | ||
|                         # 查找父容器中的标题链接
 | ||
|                         parent_container = link.parent()
 | ||
|                         if parent_container:
 | ||
|                             title_link = parent_container.find(f'a[href="/iyftv/{vod_id}/"] strong')
 | ||
|                             if title_link:
 | ||
|                                 title = title_link.text().strip()
 | ||
|                             else:
 | ||
|                                 # 查找其他可能的标题元素
 | ||
|                                 title_elem = parent_container.find(f'a[href="/iyftv/{vod_id}/"]').not_(link)
 | ||
|                                 if title_elem:
 | ||
|                                     title = title_elem.text().strip()
 | ||
| 
 | ||
|                     if not title:
 | ||
|                         continue
 | ||
| 
 | ||
|                     # 获取图片 - 优先获取data-original(真实图片),避免懒加载占位图
 | ||
|                     pic = ''
 | ||
|                     if img_elem:
 | ||
|                         pic = img_elem.attr('data-original') or img_elem.attr('data-src') or img_elem.attr('src') or ''
 | ||
|                         if pic and not pic.startswith('http'):
 | ||
|                             pic = self.host + pic if pic.startswith('/') else ''
 | ||
| 
 | ||
|                     # 获取备注信息 - 查找集数或状态信息
 | ||
|                     remarks = ''
 | ||
|                     parent_container = link.parent()
 | ||
|                     if parent_container:
 | ||
|                         # 查找集数信息(通常在图片上方的标签中)
 | ||
|                         episode_elem = parent_container.find('.episode, .status, .note')
 | ||
|                         if episode_elem:
 | ||
|                             remarks = episode_elem.text().strip()
 | ||
|                         else:
 | ||
|                             # 查找包含"第"、"集"、"期"等关键字的文本
 | ||
|                             parent_text = parent_container.text()
 | ||
|                             import re
 | ||
|                             episode_match = re.search(r'第\d+[集期]|更新至|完结|正片', parent_text)
 | ||
|                             if episode_match:
 | ||
|                                 remarks = episode_match.group()
 | ||
| 
 | ||
|                     videos.append({
 | ||
|                         'vod_id': vod_id,
 | ||
|                         'vod_name': self.fix_encoding(title),
 | ||
|                         'vod_pic': pic,
 | ||
|                         'vod_year': '',
 | ||
|                         'vod_remarks': self.fix_encoding(remarks)
 | ||
|                     })
 | ||
|                 except Exception as e:
 | ||
|                     self.log(f"解析搜索结果时出错: {e}")
 | ||
|                     continue
 | ||
| 
 | ||
|             return {'list': videos, 'page': pg}
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"搜索时出错: {e}")
 | ||
|             return {'list': [], 'page': pg}
 | ||
| 
 | ||
|     def playerContent(self, flag, id, vipFlags):
 | ||
|         """获取播放地址"""
 | ||
|         try:
 | ||
|             # 播放页面URL
 | ||
|             play_url = f"{self.host}{id}"
 | ||
| 
 | ||
|             response = self.fetch_with_encoding(play_url, headers=self.headers)
 | ||
|             doc = self.getpq(response.text)
 | ||
| 
 | ||
|             # 查找播放器配置
 | ||
|             scripts = doc('script')
 | ||
|             for script in scripts.items():
 | ||
|                 script_text = script.text()
 | ||
|                 if 'player' in script_text and ('url' in script_text):
 | ||
|                     # 尝试提取播放地址
 | ||
|                     url_match = re.search(r'"url"\s*:\s*"([^"]+)"', script_text)
 | ||
|                     if url_match:
 | ||
|                         video_url = url_match.group(1)
 | ||
|                         return {
 | ||
|                             'parse': 0,
 | ||
|                             'url': video_url,
 | ||
|                             'header': self.headers
 | ||
|                         }
 | ||
| 
 | ||
|             # 如果没有找到直接播放地址,返回播放页面让系统解析
 | ||
|             return {
 | ||
|                 'parse': 1,
 | ||
|                 'url': play_url,
 | ||
|                 'header': self.headers
 | ||
|             }
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"获取播放地址时出错: {e}")
 | ||
|             return {
 | ||
|                 'parse': 1,
 | ||
|                 'url': f"{self.host}{id}",
 | ||
|                 'header': self.headers
 | ||
|             }
 | ||
| 
 | ||
|     def localProxy(self, param):
 | ||
|         pass
 | ||
| 
 | ||
|     def fix_encoding(self, text):
 | ||
|         """修复UTF-8编码问题 - 加强版"""
 | ||
|         if not text:
 | ||
|             return text
 | ||
| 
 | ||
|         try:
 | ||
|             # 扩展的乱码特征检测
 | ||
|             garbled_patterns = [
 | ||
|                 # 常见的UTF-8乱码模式
 | ||
|                 '\u00e4\u00b8', '\u00e5', '\u00e6', '\u00e7', '\u00e8', '\u00e9',
 | ||
|                 '\u00c3\u00a4', '\u00c3\u00a5', '\u00c3\u00a6', '\u00c3\u00a7',
 | ||
|                 '\u00ef\u00bc', '\u00e2\u0080', '\u00e2\u0084',
 | ||
|                 # 更多乱码模式
 | ||
|                 '\u00c2\u00a0', '\u00c2\u00b7', '\u00c2\u00bb',
 | ||
|                 '\u00e2\u0082', '\u00e2\u0086', '\u00e2\u0088',
 | ||
|                 # 特殊字符乱码
 | ||
|                 '\u00c3\u0097', '\u00c3\u00b7', '\u00c2\u00b1'
 | ||
|             ]
 | ||
| 
 | ||
|             has_garbled = any(pattern in text for pattern in garbled_patterns)
 | ||
| 
 | ||
|             # 额外检查:如果文本包含大量非ASCII字符但没有中文,可能是乱码
 | ||
|             if not has_garbled:
 | ||
|                 non_ascii_count = sum(1 for c in text if ord(c) > 127)
 | ||
|                 chinese_count = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
 | ||
|                 if non_ascii_count > 0 and chinese_count == 0 and non_ascii_count > len(text) * 0.3:
 | ||
|                     has_garbled = True
 | ||
| 
 | ||
|             if has_garbled:
 | ||
|                 self.log(f"检测到编码问题,尝试修复: {text[:50]}...")
 | ||
| 
 | ||
|                 # 方法1: 尝试Latin1->UTF-8转换
 | ||
|                 try:
 | ||
|                     fixed = text.encode('latin1').decode('utf-8')
 | ||
|                     # 检查是否修复成功(包含中文字符且减少了乱码字符)
 | ||
|                     if re.search(r'[\u4e00-\u9fff]', fixed):
 | ||
|                         self.log("使用Latin1->UTF-8修复成功")
 | ||
|                         return fixed
 | ||
|                 except Exception as e:
 | ||
|                     self.log(f"Latin1->UTF-8修复失败: {e}")
 | ||
| 
 | ||
|                 # 方法2: 尝试其他编码转换
 | ||
|                 encodings = ['cp1252', 'iso-8859-1', 'windows-1252']
 | ||
|                 for encoding in encodings:
 | ||
|                     try:
 | ||
|                         fixed = text.encode(encoding).decode('utf-8')
 | ||
|                         if re.search(r'[\u4e00-\u9fff]', fixed):
 | ||
|                             self.log(f"使用{encoding}->UTF-8修复成功")
 | ||
|                             return fixed
 | ||
|                     except:
 | ||
|                         continue
 | ||
| 
 | ||
|                 # 方法3: 尝试直接处理常见的乱码替换
 | ||
|                 try:
 | ||
|                     # 常见乱码字符替换表
 | ||
|                     replacements = {
 | ||
|                         '\u00e4\u00b8\u00ad': '中',
 | ||
|                         '\u00e6\u0096\u0087': '文',
 | ||
|                         '\u00e5\u00bd\u00b1': '影',
 | ||
|                         '\u00e8\u00a7\u0086': '视',
 | ||
|                         '\u00e9\u00a2\u0091': '频',
 | ||
|                     }
 | ||
| 
 | ||
|                     fixed = text
 | ||
|                     for garbled, correct in replacements.items():
 | ||
|                         fixed = fixed.replace(garbled, correct)
 | ||
| 
 | ||
|                     if fixed != text and re.search(r'[\u4e00-\u9fff]', fixed):
 | ||
|                         self.log("使用字符替换修复成功")
 | ||
|                         return fixed
 | ||
|                 except:
 | ||
|                     pass
 | ||
| 
 | ||
|                 self.log("编码修复失败,返回原文本")
 | ||
| 
 | ||
|             return text
 | ||
| 
 | ||
|         except Exception as e:
 | ||
|             self.log(f"编码修复异常: {e}")
 | ||
|             return text
 | ||
| 
 | ||
|     def fetch_with_encoding(self, url, **kwargs):
 | ||
|         """带编码处理的请求方法"""
 | ||
|         try:
 | ||
|             response = self.fetch(url, **kwargs)
 | ||
|             # 确保使用UTF-8编码
 | ||
|             response.encoding = 'utf-8'
 | ||
|             return response
 | ||
|         except Exception as e:
 | ||
|             self.log(f"请求失败: {e}")
 | ||
|             raise
 | ||
| 
 | ||
|     def getpq(self, text):
 | ||
|         """安全的pyquery解析"""
 | ||
|         try:
 | ||
|             return pq(text)
 | ||
|         except Exception as e:
 | ||
|             self.log(f"pyquery解析出错: {e}")
 | ||
|             try:
 | ||
|                 return pq(text.encode('utf-8'))
 | ||
|             except:
 | ||
|                 return pq('') |