mirror of
https://github.com/yuanwangokk-1/TV-BOX.git
synced 2025-10-24 23:11:21 +00:00
599 lines
21 KiB
Python
599 lines
21 KiB
Python
# coding=utf-8
|
||
# !/usr/bin/python
|
||
import sys
|
||
sys.path.append('..')
|
||
from base.spider import Spider
|
||
import json
|
||
import time
|
||
import urllib.parse
|
||
import re
|
||
import base64
|
||
|
||
class Spider(Spider):
|
||
|
||
def getName(self):
|
||
return "两个BT"
|
||
|
||
def init(self, extend=""):
|
||
self.host = "https://www.bttwoo.com"
|
||
self.headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||
'Connection': 'keep-alive',
|
||
'Referer': self.host
|
||
}
|
||
self.log(f"两个BT爬虫初始化完成,主站: {self.host}")
|
||
|
||
def isVideoFormat(self, url):
|
||
pass
|
||
|
||
def manualVideoCheck(self):
|
||
pass
|
||
|
||
def homeContent(self, filter):
|
||
"""首页内容 - TVBox标准实现"""
|
||
result = {}
|
||
|
||
# 1. 定义分类 - 基于实际网站结构
|
||
classes = [
|
||
{'type_id': 'movie_bt_tags/xiju', 'type_name': '喜剧'},
|
||
{'type_id': 'movie_bt_tags/aiqing', 'type_name': '爱情'},
|
||
{'type_id': 'movie_bt_tags/adt', 'type_name': '冒险'},
|
||
{'type_id': 'movie_bt_tags/at', 'type_name': '动作'},
|
||
{'type_id': 'movie_bt_tags/donghua', 'type_name': '动画'},
|
||
{'type_id': 'movie_bt_tags/qihuan', 'type_name': '奇幻'},
|
||
{'type_id': 'movie_bt_tags/xuanni', 'type_name': '悬疑'},
|
||
{'type_id': 'movie_bt_tags/kehuan', 'type_name': '科幻'},
|
||
{'type_id': 'movie_bt_tags/juqing', 'type_name': '剧情'},
|
||
{'type_id': 'movie_bt_tags/kongbu', 'type_name': '恐怖'},
|
||
{'type_id': 'meiju', 'type_name': '美剧'},
|
||
{'type_id': 'gf', 'type_name': '高分电影'}
|
||
]
|
||
result['class'] = classes
|
||
|
||
# 2. 添加筛选配置
|
||
result['filters'] = self._get_filters()
|
||
|
||
# 3. 获取首页推荐内容
|
||
try:
|
||
rsp = self.fetch(self.host, headers=self.headers)
|
||
doc = self.html(rsp.text)
|
||
videos = self._get_videos(doc, limit=50)
|
||
result['list'] = videos
|
||
except Exception as e:
|
||
self.log(f"首页获取出错: {str(e)}")
|
||
result['list'] = []
|
||
|
||
return result
|
||
|
||
def homeVideoContent(self):
|
||
"""兼容性方法 - 提供分类定义"""
|
||
return {
|
||
'class': [
|
||
{'type_id': 'movie_bt_tags/xiju', 'type_name': '喜剧'},
|
||
{'type_id': 'movie_bt_tags/aiqing', 'type_name': '爱情'},
|
||
{'type_id': 'movie_bt_tags/adt', 'type_name': '冒险'},
|
||
{'type_id': 'movie_bt_tags/at', 'type_name': '动作'},
|
||
{'type_id': 'movie_bt_tags/donghua', 'type_name': '动画'},
|
||
{'type_id': 'movie_bt_tags/qihuan', 'type_name': '奇幻'},
|
||
{'type_id': 'movie_bt_tags/xuanni', 'type_name': '悬疑'},
|
||
{'type_id': 'movie_bt_tags/kehuan', 'type_name': '科幻'},
|
||
{'type_id': 'movie_bt_tags/juqing', 'type_name': '剧情'},
|
||
{'type_id': 'movie_bt_tags/kongbu', 'type_name': '恐怖'},
|
||
{'type_id': 'meiju', 'type_name': '美剧'},
|
||
{'type_id': 'gf', 'type_name': '高分电影'}
|
||
],
|
||
'filters': self._get_filters()
|
||
}
|
||
|
||
def categoryContent(self, tid, pg, filter, extend):
|
||
"""分类内容 - 支持筛选功能"""
|
||
try:
|
||
# 合并filter和extend参数
|
||
if filter and isinstance(filter, dict):
|
||
if not extend:
|
||
extend = {}
|
||
extend.update(filter)
|
||
|
||
self.log(f"分类请求: tid={tid}, pg={pg}, extend={extend}")
|
||
|
||
url = self._build_url(tid, pg, extend)
|
||
if not url:
|
||
return {'list': []}
|
||
|
||
rsp = self.fetch(url, headers=self.headers)
|
||
doc = self.html(rsp.text)
|
||
videos = self._get_videos(doc, limit=20)
|
||
|
||
return {
|
||
'list': videos,
|
||
'page': int(pg),
|
||
'pagecount': 999,
|
||
'limit': 20,
|
||
'total': 19980
|
||
}
|
||
except Exception as e:
|
||
self.log(f"分类内容获取出错: {str(e)}")
|
||
return {'list': []}
|
||
|
||
def searchContent(self, key, quick, pg="1"):
|
||
"""搜索功能 - 智能过滤"""
|
||
try:
|
||
search_url = f"{self.host}/xssssearch?q={urllib.parse.quote(key)}"
|
||
if pg and pg != "1":
|
||
search_url += f"&p={pg}"
|
||
|
||
self.log(f"搜索URL: {search_url}")
|
||
rsp = self.fetch(search_url, headers=self.headers)
|
||
doc = self.html(rsp.text)
|
||
|
||
videos = []
|
||
seen_ids = set()
|
||
|
||
# 搜索结果选择器
|
||
elements = doc.xpath('//li[contains(@class,"") and .//a[contains(@href,"/movie/")]]')
|
||
self.log(f"找到 {len(elements)} 个搜索结果元素")
|
||
|
||
for elem in elements:
|
||
video = self._extract_video_info(elem, is_search=True)
|
||
if video and video['vod_id'] not in seen_ids:
|
||
# 添加相关性检查
|
||
if self._is_relevant_search_result(video['vod_name'], key):
|
||
videos.append(video)
|
||
seen_ids.add(video['vod_id'])
|
||
self.log(f"✅ 相关视频: {video['vod_name']} (ID: {video['vod_id']})")
|
||
else:
|
||
self.log(f"❌ 过滤无关: {video['vod_name']} (搜索: {key})")
|
||
|
||
self.log(f"最终搜索结果: {len(videos)} 个视频")
|
||
return {'list': videos}
|
||
except Exception as e:
|
||
self.log(f"搜索出错: {str(e)}")
|
||
return {'list': []}
|
||
|
||
def detailContent(self, ids):
|
||
"""详情页面"""
|
||
try:
|
||
vid = ids[0]
|
||
detail_url = f"{self.host}/movie/{vid}.html"
|
||
rsp = self.fetch(detail_url, headers=self.headers)
|
||
doc = self.html(rsp.text)
|
||
|
||
video_info = self._get_detail(doc, vid)
|
||
return {'list': [video_info]} if video_info else {'list': []}
|
||
except Exception as e:
|
||
self.log(f"详情获取出错: {str(e)}")
|
||
return {'list': []}
|
||
|
||
def playerContent(self, flag, id, vipFlags):
|
||
"""播放链接"""
|
||
try:
|
||
self.log(f"获取播放链接: flag={flag}, id={id}")
|
||
|
||
# 解码Base64播放ID
|
||
try:
|
||
decoded_id = base64.b64decode(id).decode('utf-8')
|
||
self.log(f"解码播放ID: {decoded_id}")
|
||
except:
|
||
decoded_id = id
|
||
|
||
play_url = f"{self.host}/v_play/{id}.html"
|
||
|
||
# 返回播放页面URL,让播放器处理
|
||
return {'parse': 1, 'playUrl': '', 'url': play_url}
|
||
except Exception as e:
|
||
self.log(f"播放链接获取出错: {str(e)}")
|
||
return {'parse': 1, 'playUrl': '', 'url': f"{self.host}/v_play/{id}.html"}
|
||
|
||
# ========== 辅助方法 ==========
|
||
|
||
def _get_filters(self):
|
||
"""获取筛选配置 - TVBox兼容版"""
|
||
base_filters = [
|
||
{
|
||
'key': 'area',
|
||
'name': '地区',
|
||
'value': [
|
||
{'n': '全部', 'v': ''},
|
||
{'n': '中国大陆', 'v': '中国大陆'},
|
||
{'n': '美国', 'v': '美国'},
|
||
{'n': '韩国', 'v': '韩国'},
|
||
{'n': '日本', 'v': '日本'},
|
||
{'n': '英国', 'v': '英国'},
|
||
{'n': '法国', 'v': '法国'},
|
||
{'n': '德国', 'v': '德国'},
|
||
{'n': '其他', 'v': '其他'}
|
||
]
|
||
},
|
||
{
|
||
'key': 'year',
|
||
'name': '年份',
|
||
'value': [
|
||
{'n': '全部', 'v': ''},
|
||
{'n': '2025', 'v': '2025'},
|
||
{'n': '2024', 'v': '2024'},
|
||
{'n': '2023', 'v': '2023'},
|
||
{'n': '2022', 'v': '2022'},
|
||
{'n': '2021', 'v': '2021'},
|
||
{'n': '2020', 'v': '2020'},
|
||
{'n': '2019', 'v': '2019'},
|
||
{'n': '2018', 'v': '2018'}
|
||
]
|
||
}
|
||
]
|
||
|
||
# 为每个分类提供筛选配置
|
||
filters = {}
|
||
category_ids = [
|
||
'movie_bt_tags/xiju', 'movie_bt_tags/aiqing', 'movie_bt_tags/adt',
|
||
'movie_bt_tags/at', 'movie_bt_tags/donghua', 'movie_bt_tags/qihuan',
|
||
'movie_bt_tags/xuanni', 'movie_bt_tags/kehuan', 'movie_bt_tags/juqing',
|
||
'movie_bt_tags/kongbu', 'meiju', 'gf'
|
||
]
|
||
|
||
for category_id in category_ids:
|
||
filters[category_id] = base_filters
|
||
|
||
return filters
|
||
|
||
def _build_url(self, tid, pg, extend):
|
||
"""构建URL - 支持筛选"""
|
||
try:
|
||
# 基础分类URL映射
|
||
if tid.startswith('movie_bt_tags/'):
|
||
url = f"{self.host}/{tid}"
|
||
elif tid == 'meiju':
|
||
url = f"{self.host}/meiju"
|
||
elif tid == 'gf':
|
||
url = f"{self.host}/gf"
|
||
else:
|
||
url = f"{self.host}/{tid}"
|
||
|
||
# 添加分页
|
||
if pg and pg != '1':
|
||
if '?' in url:
|
||
url += f"&paged={pg}"
|
||
else:
|
||
url += f"?paged={pg}"
|
||
|
||
return url
|
||
except Exception as e:
|
||
self.log(f"构建URL出错: {str(e)}")
|
||
return f"{self.host}/movie_bt_tags/xiju"
|
||
|
||
def _get_videos(self, doc, limit=None):
|
||
"""获取视频列表"""
|
||
try:
|
||
videos = []
|
||
seen_ids = set()
|
||
|
||
# 尝试多种选择器
|
||
selectors = [
|
||
'//li[.//a[contains(@href,"/movie/")]]',
|
||
'//div[contains(@class,"item")]//li[.//a[contains(@href,"/movie/")]]'
|
||
]
|
||
|
||
for selector in selectors:
|
||
elements = doc.xpath(selector)
|
||
if elements:
|
||
for elem in elements:
|
||
video = self._extract_video_info(elem)
|
||
if video and video['vod_id'] not in seen_ids:
|
||
videos.append(video)
|
||
seen_ids.add(video['vod_id'])
|
||
break
|
||
|
||
return videos[:limit] if limit and videos else videos
|
||
except Exception as e:
|
||
self.log(f"获取视频列表出错: {str(e)}")
|
||
return []
|
||
|
||
def _extract_video_info(self, element, is_search=False):
|
||
"""提取视频信息"""
|
||
try:
|
||
# 提取链接
|
||
links = element.xpath('.//a[contains(@href,"/movie/")]/@href')
|
||
if not links:
|
||
return None
|
||
|
||
link = links[0]
|
||
if link.startswith('/'):
|
||
link = self.host + link
|
||
|
||
vod_id = self.regStr(r'/movie/(\d+)\.html', link)
|
||
if not vod_id:
|
||
return None
|
||
|
||
# 提取标题
|
||
title_selectors = [
|
||
'.//h3/a/text()',
|
||
'.//h3/text()',
|
||
'.//a/@title',
|
||
'.//a/text()'
|
||
]
|
||
title = ''
|
||
for selector in title_selectors:
|
||
titles = element.xpath(selector)
|
||
for t in titles:
|
||
if t and t.strip() and len(t.strip()) > 1:
|
||
title = t.strip()
|
||
break
|
||
if title:
|
||
break
|
||
|
||
if not title:
|
||
return None
|
||
|
||
# 提取图片
|
||
pic = self._extract_image(element, is_search, vod_id)
|
||
|
||
# 提取备注
|
||
remarks = self._extract_remarks(element)
|
||
|
||
return {
|
||
'vod_id': vod_id,
|
||
'vod_name': title,
|
||
'vod_pic': pic,
|
||
'vod_remarks': remarks,
|
||
'vod_year': ''
|
||
}
|
||
except Exception as e:
|
||
self.log(f"提取视频信息出错: {str(e)}")
|
||
return None
|
||
|
||
def _extract_image(self, element, is_search=False, vod_id=None):
|
||
"""图片提取 - 处理懒加载"""
|
||
pic_selectors = [
|
||
'.//img/@data-original',
|
||
'.//img/@data-src',
|
||
'.//img/@src'
|
||
]
|
||
|
||
for selector in pic_selectors:
|
||
pics = element.xpath(selector)
|
||
for p in pics:
|
||
# 跳过懒加载占位符
|
||
if (p and not p.endswith('blank.gif') and
|
||
not p.startswith('data:image/') and 'base64' not in p):
|
||
if p.startswith('//'):
|
||
return 'https:' + p
|
||
elif p.startswith('/'):
|
||
return self.host + p
|
||
elif p.startswith('http'):
|
||
return p
|
||
|
||
# 搜索页面特殊处理:从详情页面获取
|
||
if is_search and vod_id:
|
||
return self._get_image_from_detail(vod_id)
|
||
|
||
return ''
|
||
|
||
def _extract_remarks(self, element):
|
||
"""提取备注信息"""
|
||
remarks_selectors = [
|
||
'.//span[contains(@class,"rating")]/text()',
|
||
'.//div[contains(@class,"rating")]/text()',
|
||
'.//span[contains(@class,"status")]/text()',
|
||
'.//div[contains(@class,"status")]/text()',
|
||
'.//span[contains(text(),"集")]/text()',
|
||
'.//span[contains(text(),"1080p")]/text()',
|
||
'.//span[contains(text(),"HD")]/text()'
|
||
]
|
||
|
||
for selector in remarks_selectors:
|
||
remarks_list = element.xpath(selector)
|
||
for r in remarks_list:
|
||
if r and r.strip():
|
||
return r.strip()
|
||
|
||
return ''
|
||
|
||
def _get_image_from_detail(self, vod_id):
|
||
"""从详情页面获取图片"""
|
||
try:
|
||
detail_url = f"{self.host}/movie/{vod_id}.html"
|
||
rsp = self.fetch(detail_url, headers=self.headers)
|
||
doc = self.html(rsp.text)
|
||
|
||
# 详情页图片选择器
|
||
pic_selectors = [
|
||
'//img[contains(@class,"poster")]/@src',
|
||
'//div[contains(@class,"poster")]//img/@src',
|
||
'//img[contains(@alt,"")]/@src'
|
||
]
|
||
|
||
for selector in pic_selectors:
|
||
pics = doc.xpath(selector)
|
||
for p in pics:
|
||
if p and not p.endswith('blank.gif'):
|
||
if p.startswith('//'):
|
||
return 'https:' + p
|
||
elif p.startswith('/'):
|
||
return self.host + p
|
||
elif p.startswith('http'):
|
||
return p
|
||
except:
|
||
pass
|
||
|
||
return ''
|
||
|
||
def _is_relevant_search_result(self, title, search_key):
|
||
"""检查搜索结果是否与搜索关键词相关"""
|
||
if not title or not search_key:
|
||
return False
|
||
|
||
title_lower = title.lower()
|
||
search_key_lower = search_key.lower()
|
||
|
||
# 直接包含搜索关键词的肯定相关
|
||
if search_key_lower in title_lower:
|
||
return True
|
||
|
||
# 字符匹配
|
||
search_chars = set(search_key_lower.replace(' ', ''))
|
||
title_chars = set(title_lower.replace(' ', ''))
|
||
|
||
if len(search_chars) > 0:
|
||
match_ratio = len(search_chars & title_chars) / len(search_chars)
|
||
if match_ratio >= 0.6:
|
||
return True
|
||
|
||
# 短搜索词要求严格匹配
|
||
if len(search_key_lower) <= 2:
|
||
return search_key_lower in title_lower
|
||
|
||
return False
|
||
|
||
def _get_detail(self, doc, vod_id):
|
||
"""获取详情信息"""
|
||
try:
|
||
# 提取标题
|
||
title_selectors = [
|
||
'//h1/text()',
|
||
'//h2/text()',
|
||
'//title/text()'
|
||
]
|
||
title = ''
|
||
for selector in title_selectors:
|
||
titles = doc.xpath(selector)
|
||
for t in titles:
|
||
if t and t.strip():
|
||
title = t.strip()
|
||
break
|
||
if title:
|
||
break
|
||
|
||
# 提取图片
|
||
pic_selectors = [
|
||
'//img[contains(@class,"poster")]/@src',
|
||
'//div[contains(@class,"poster")]//img/@src',
|
||
'//img/@src'
|
||
]
|
||
pic = ''
|
||
for selector in pic_selectors:
|
||
pics = doc.xpath(selector)
|
||
for p in pics:
|
||
if p and not p.endswith('blank.gif'):
|
||
if p.startswith('//'):
|
||
pic = 'https:' + p
|
||
elif p.startswith('/'):
|
||
pic = self.host + p
|
||
elif p.startswith('http'):
|
||
pic = p
|
||
break
|
||
if pic:
|
||
break
|
||
|
||
# 提取描述
|
||
desc_selectors = [
|
||
'//div[contains(@class,"intro")]//text()',
|
||
'//div[contains(@class,"description")]//text()',
|
||
'//p[contains(@class,"desc")]//text()'
|
||
]
|
||
desc = ''
|
||
for selector in desc_selectors:
|
||
descs = doc.xpath(selector)
|
||
desc_parts = []
|
||
for d in descs:
|
||
if d and d.strip():
|
||
desc_parts.append(d.strip())
|
||
if desc_parts:
|
||
desc = ' '.join(desc_parts)
|
||
break
|
||
|
||
# 提取演员
|
||
actor_selectors = [
|
||
'//li[contains(text(),"主演")]/text()',
|
||
'//span[contains(text(),"主演")]/following-sibling::text()',
|
||
'//div[contains(@class,"actor")]//text()'
|
||
]
|
||
actor = ''
|
||
for selector in actor_selectors:
|
||
actors = doc.xpath(selector)
|
||
for a in actors:
|
||
if a and a.strip() and '主演' in a:
|
||
actor = a.strip().replace('主演:', '').replace('主演', '')
|
||
break
|
||
if actor:
|
||
break
|
||
|
||
# 提取导演
|
||
director_selectors = [
|
||
'//li[contains(text(),"导演")]/text()',
|
||
'//span[contains(text(),"导演")]/following-sibling::text()',
|
||
'//div[contains(@class,"director")]//text()'
|
||
]
|
||
director = ''
|
||
for selector in director_selectors:
|
||
directors = doc.xpath(selector)
|
||
for d in directors:
|
||
if d and d.strip() and '导演' in d:
|
||
director = d.strip().replace('导演:', '').replace('导演', '')
|
||
break
|
||
if director:
|
||
break
|
||
|
||
# 提取播放源
|
||
play_sources = self._parse_play_sources(doc, vod_id)
|
||
|
||
return {
|
||
'vod_id': vod_id,
|
||
'vod_name': title,
|
||
'vod_pic': pic,
|
||
'type_name': '',
|
||
'vod_year': '',
|
||
'vod_area': '',
|
||
'vod_remarks': '',
|
||
'vod_actor': actor,
|
||
'vod_director': director,
|
||
'vod_content': desc,
|
||
'vod_play_from': '$$$'.join([source['name'] for source in play_sources]),
|
||
'vod_play_url': '$$$'.join([source['episodes'] for source in play_sources])
|
||
}
|
||
except Exception as e:
|
||
self.log(f"获取详情出错: {str(e)}")
|
||
return None
|
||
|
||
def _parse_play_sources(self, doc, vod_id):
|
||
"""解析播放源"""
|
||
try:
|
||
play_sources = []
|
||
|
||
# 查找播放链接
|
||
episode_selectors = [
|
||
'//a[contains(@href,"/v_play/")]',
|
||
'//div[contains(@class,"play")]//a'
|
||
]
|
||
|
||
episodes = []
|
||
for selector in episode_selectors:
|
||
episode_elements = doc.xpath(selector)
|
||
if episode_elements:
|
||
for ep in episode_elements:
|
||
ep_title = ep.xpath('./text()')[0] if ep.xpath('./text()') else ''
|
||
ep_url = ep.xpath('./@href')[0] if ep.xpath('./@href') else ''
|
||
|
||
if ep_title and ep_url:
|
||
# 提取播放ID
|
||
play_id = self.regStr(r'/v_play/([^.]+)\.html', ep_url)
|
||
if play_id:
|
||
episodes.append(f"{ep_title.strip()}${play_id}")
|
||
break
|
||
|
||
if episodes:
|
||
play_sources.append({
|
||
'name': '默认播放',
|
||
'episodes': '#'.join(episodes)
|
||
})
|
||
else:
|
||
# 默认播放源
|
||
play_sources.append({
|
||
'name': '默认播放',
|
||
'episodes': f'第1集$bXZfMTM0NTY4LW5tXzE='
|
||
})
|
||
|
||
return play_sources
|
||
except Exception as e:
|
||
self.log(f"解析播放源出错: {str(e)}")
|
||
return [{'name': '默认播放', 'episodes': f'第1集$bXZfMTM0NTY4LW5tXzE='}]
|