mirror of
https://github.com/yuanwangokk-1/TV-BOX.git
synced 2025-10-24 23:11:21 +00:00
328 lines
No EOL
11 KiB
Python
328 lines
No EOL
11 KiB
Python
# coding=utf-8
|
|
#!/usr/bin/python
|
|
import sys
|
|
sys.path.append('..')
|
|
from base.spider import Spider
|
|
import json
|
|
import time
|
|
import urllib.parse
|
|
import re
|
|
import requests
|
|
from lxml import etree
|
|
|
|
class Spider(Spider):
|
|
def getName(self):
|
|
return "四虎视频"
|
|
|
|
def init(self, extend=""):
|
|
self.baseUrl = "https://www.sihuhu.xyz"
|
|
self.headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
|
|
"Referer": self.baseUrl
|
|
}
|
|
|
|
def homeContent(self, filter):
|
|
result = {}
|
|
# 从HTML中提取分类信息
|
|
cateManual = {
|
|
"传媒厂商": "20",
|
|
"麻豆传媒": "21",
|
|
"91制片": "22",
|
|
"蜜桃传媒": "23",
|
|
"天美传媒": "24",
|
|
"精东影片": "25",
|
|
"星空传媒": "26",
|
|
"葫芦影业": "27",
|
|
"糖心VLOG": "28",
|
|
"精品推荐": "29",
|
|
"日本无码": "30",
|
|
"日本有码": "31",
|
|
"AV解说": "32",
|
|
"中文有码": "33",
|
|
"中文无码": "34",
|
|
"日韩极品": "35",
|
|
"日韩无码": "36",
|
|
"少女萝莉": "37",
|
|
"水嫩萝莉": "38",
|
|
"极品主播": "40",
|
|
"卡通动漫": "43",
|
|
"SM调教": "44",
|
|
"探花合集": "50",
|
|
"91大神": "51",
|
|
"台湾萝莉": "54",
|
|
"萝莉传媒": "55",
|
|
"白虎口爆": "57",
|
|
"嫩女网爆": "47",
|
|
"嫩逼乌鸡": "42",
|
|
"少女伦理": "45",
|
|
"萝莉互口": "46",
|
|
"黑料网爆": "48",
|
|
"野战车震": "52",
|
|
"萝莉黑瓜": "53",
|
|
"萝莉巨乳": "58",
|
|
"明星换脸": "73",
|
|
"萝莉抠逼": "56",
|
|
"国产大作": "39",
|
|
"欧美萝莉": "41",
|
|
"热门事件": "49",
|
|
"少女3P": "59",
|
|
"偷拍萝莉": "60",
|
|
"强奸少女": "61",
|
|
"重口猎奇": "62",
|
|
"制服萝控": "63",
|
|
"极品少女": "64",
|
|
"明星爆料": "65",
|
|
"X短视频": "66",
|
|
"AV明星": "67",
|
|
"极品萝莉": "68",
|
|
"人妻艹妈": "69",
|
|
"VR视角": "70",
|
|
"角色扮演": "71",
|
|
"男同男娘": "72"
|
|
}
|
|
classes = []
|
|
for k in cateManual:
|
|
classes.append({
|
|
'type_name': k,
|
|
'type_id': cateManual[k]
|
|
})
|
|
result['class'] = classes
|
|
return result
|
|
|
|
def homeVideoContent(self):
|
|
result = {}
|
|
# 尝试获取首页推荐视频
|
|
try:
|
|
rsp = self.fetch(self.baseUrl, headers=self.headers)
|
|
html = etree.HTML(rsp.text)
|
|
|
|
videos = []
|
|
# 尝试解析首页视频列表
|
|
video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
|
|
for element in video_elements:
|
|
try:
|
|
name = element.xpath('.//h5/a/text()')[0].strip()
|
|
pic = element.xpath('.//img/@data-original')[0]
|
|
if not pic.startswith('http'):
|
|
pic = self.baseUrl + pic
|
|
href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
|
|
vid = href.split('/')[-1].replace('.html', '')
|
|
remark = element.xpath('.//span[@class="title"]/text()')
|
|
remark = remark[0] if remark else ""
|
|
|
|
videos.append({
|
|
"vod_id": vid,
|
|
"vod_name": name,
|
|
"vod_pic": pic,
|
|
"vod_remarks": remark
|
|
})
|
|
except:
|
|
continue
|
|
|
|
result['list'] = videos
|
|
except:
|
|
result['list'] = []
|
|
|
|
return result
|
|
|
|
def categoryContent(self, tid, pg, filter, extend):
|
|
result = {}
|
|
url = f'{self.baseUrl}/vod/type/id/{tid}/page/{pg}.html'
|
|
rsp = self.fetch(url, headers=self.headers)
|
|
html = etree.HTML(rsp.text)
|
|
|
|
vodList = []
|
|
video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
|
|
for element in video_elements:
|
|
try:
|
|
name = element.xpath('.//h5/a/text()')[0].strip()
|
|
pic = element.xpath('.//img/@data-original')[0]
|
|
if not pic.startswith('http'):
|
|
pic = self.baseUrl + pic
|
|
href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
|
|
vid = href.split('/')[-1].replace('.html', '')
|
|
remark = element.xpath('.//span[@class="title"]/text()')
|
|
remark = remark[0] if remark else ""
|
|
|
|
vodList.append({
|
|
"vod_id": vid,
|
|
"vod_name": name,
|
|
"vod_pic": pic,
|
|
"vod_remarks": remark
|
|
})
|
|
except:
|
|
continue
|
|
|
|
result['list'] = vodList
|
|
result['page'] = pg
|
|
result['pagecount'] = 9999
|
|
result['limit'] = 20
|
|
result['total'] = 999999
|
|
return result
|
|
|
|
def detailContent(self, array):
|
|
tid = array[0]
|
|
url = f'{self.baseUrl}/vod/detail/id/{tid}.html'
|
|
rsp = self.fetch(url, headers=self.headers)
|
|
html = etree.HTML(rsp.text)
|
|
|
|
# 获取视频详情
|
|
title = html.xpath('//title/text()')[0].replace(' - 四虎视频', '')
|
|
pic = html.xpath('//meta[@property="og:image"]/@content')
|
|
pic = pic[0] if pic else ""
|
|
desc = html.xpath('//meta[@name="description"]/@content')
|
|
desc = desc[0] if desc else ""
|
|
|
|
# 获取播放列表 - 修复播放地址获取
|
|
play_from = []
|
|
play_url = []
|
|
|
|
# 尝试获取所有播放源
|
|
play_sources = html.xpath('//div[@class="module-play-list"]/div')
|
|
for source in play_sources:
|
|
source_name = source.xpath('.//span/text()')
|
|
if source_name:
|
|
source_name = source_name[0].strip()
|
|
play_from.append(source_name)
|
|
|
|
# 获取该源下的所有剧集
|
|
episodes = source.xpath('.//a')
|
|
episode_urls = []
|
|
for episode in episodes:
|
|
ep_name = episode.xpath('./text()')[0].strip()
|
|
ep_href = episode.xpath('./@href')[0]
|
|
episode_urls.append(f"{ep_name}${self.baseUrl}{ep_href}")
|
|
|
|
play_url.append("#".join(episode_urls))
|
|
|
|
# 如果没有找到播放源,使用默认方式
|
|
if not play_from:
|
|
play_from = ["默认"]
|
|
play_page_url = f"{self.baseUrl}/vod/play/id/{tid}/sid/1/nid/1.html"
|
|
play_url.append(f"第1集${play_page_url}")
|
|
|
|
vod = {
|
|
"vod_id": tid,
|
|
"vod_name": title,
|
|
"vod_pic": pic,
|
|
"vod_content": desc,
|
|
"vod_play_from": "$$$".join(play_from),
|
|
"vod_play_url": "$$$".join(play_url)
|
|
}
|
|
|
|
return {'list': [vod]}
|
|
|
|
def searchContent(self, key, quick):
|
|
result = {}
|
|
url = f'{self.baseUrl}/vod/search/page/1/wd/{urllib.parse.quote(key)}.html'
|
|
rsp = self.fetch(url, headers=self.headers)
|
|
html = etree.HTML(rsp.text)
|
|
|
|
vodList = []
|
|
video_elements = html.xpath('//ul[@class="thumbnail-group clearfix"]/li')
|
|
for element in video_elements:
|
|
try:
|
|
name = element.xpath('.//h5/a/text()')[0].strip()
|
|
pic = element.xpath('.//img/@data-original')[0]
|
|
if not pic.startswith('http'):
|
|
pic = self.baseUrl + pic
|
|
href = element.xpath('.//a[@class="thumbnail"]/@href')[0]
|
|
vid = href.split('/')[-1].replace('.html', '')
|
|
remark = element.xpath('.//span[@class="title"]/text()')
|
|
remark = remark[0] if remark else ""
|
|
|
|
vodList.append({
|
|
"vod_id": vid,
|
|
"vod_name": name,
|
|
"vod_pic": pic,
|
|
"vod_remarks": remark
|
|
})
|
|
except:
|
|
continue
|
|
|
|
result['list'] = vodList
|
|
return result
|
|
|
|
def playerContent(self, flag, id, vipFlags):
|
|
result = {}
|
|
# 解析播放地址
|
|
url = f'{self.baseUrl}{id}' if id.startswith('/') else id
|
|
|
|
# 检查是否已经是m3u8链接
|
|
if id.endswith('.m3u8'):
|
|
result["parse"] = 0
|
|
result["playUrl"] = ""
|
|
result["url"] = id
|
|
result["header"] = json.dumps(self.headers)
|
|
return result
|
|
|
|
rsp = self.fetch(url, headers=self.headers)
|
|
|
|
# 方法1: 尝试从JavaScript变量中提取播放信息
|
|
pattern = r'var player_aaaa\s*=\s*({.*?});'
|
|
match = re.search(pattern, rsp.text, re.DOTALL)
|
|
|
|
if match:
|
|
try:
|
|
player_info = json.loads(match.group(1))
|
|
video_url = player_info.get('url', '')
|
|
|
|
if video_url:
|
|
# 处理转义字符
|
|
video_url = video_url.replace('\\/', '/')
|
|
result["parse"] = 0
|
|
result["playUrl"] = ""
|
|
result["url"] = video_url
|
|
result["header"] = json.dumps(self.headers)
|
|
return result
|
|
except:
|
|
pass
|
|
|
|
# 方法2: 尝试从JavaScript中找到url字段
|
|
url_patterns = [
|
|
r'"url"\s*:\s*"([^"]+)"',
|
|
r"url\s*:\s*'([^']+)'",
|
|
r'video_url\s*:\s*"([^"]+)"',
|
|
r"video_url\s*:\s*'([^']+)'"
|
|
]
|
|
|
|
for pattern in url_patterns:
|
|
match = re.search(pattern, rsp.text)
|
|
if match:
|
|
video_url = match.group(1).replace('\\/', '/')
|
|
if video_url and ('m3u8' in video_url or 'mp4' in video_url):
|
|
result["parse"] = 0
|
|
result["playUrl"] = ""
|
|
result["url"] = video_url
|
|
result["header"] = json.dumps(self.headers)
|
|
return result
|
|
|
|
# 方法3: 尝试从iframe中提取视频地址
|
|
iframe_pattern = r'<iframe[^>]+src="([^"]+)"'
|
|
iframe_match = re.search(iframe_pattern, rsp.text)
|
|
if iframe_match:
|
|
iframe_src = iframe_match.group(1)
|
|
if iframe_src.startswith('//'):
|
|
iframe_src = 'https:' + iframe_src
|
|
elif iframe_src.startswith('/'):
|
|
iframe_src = self.baseUrl + iframe_src
|
|
|
|
# 递归获取iframe内容
|
|
return self.playerContent(flag, iframe_src, vipFlags)
|
|
|
|
# 如果以上方法都失败,返回原始页面供进一步解析
|
|
result["parse"] = 1
|
|
result["playUrl"] = ""
|
|
result["url"] = url
|
|
result["header"] = json.dumps(self.headers)
|
|
|
|
return result
|
|
|
|
def isVideoFormat(self, url):
|
|
pass
|
|
|
|
def manualVideoCheck(self):
|
|
pass
|
|
|
|
def localProxy(self, param):
|
|
return [] |