PyramidStore/base/spider.py
JJBJJ 6bad0810f1 fix(base/spider.py): 更新
refactor(plugin/小白调试示例.py): 添加调试示例
2025-03-26 20:49:23 +08:00

151 lines
No EOL
4.4 KiB
Python

import re
import os
import json
import time
import requests
from lxml import etree
from abc import abstractmethod, ABCMeta
from importlib.machinery import SourceFileLoader
from base.localProxy import Proxy
class Spider(metaclass=ABCMeta):
_instance = None
def __init__(self):
self.extend = ''
def __new__(cls, *args, **kwargs):
if cls._instance:
return cls._instance
else:
cls._instance = super().__new__(cls)
return cls._instance
@abstractmethod
def init(self, extend=""):
pass
def homeContent(self, filter):
pass
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
pass
def detailContent(self, ids):
pass
def searchContent(self, key, quick, pg="1"):
pass
def playerContent(self, flag, id, vipFlags):
pass
def liveContent(self, url):
pass
def localProxy(self, param):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def action(self, action):
pass
def destroy(self):
pass
def getName(self):
pass
def getDependence(self):
return []
def loadSpider(self, name):
return self.loadModule(name).Spider()
def loadModule(self, name):
path = os.path.join(os.path.join("../plugin"), f'{name}.py')
return SourceFileLoader(name, path).load_module()
def regStr(self, reg, src, group=1):
m = re.search(reg, src)
src = ''
if m:
src = m.group(group)
return src
def removeHtmlTags(self, src):
clean = re.compile('<.*?>')
return re.sub(clean, '', src)
def cleanText(self, src):
clean = re.sub('[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]', '',
src)
return clean
def fetch(self, url, params=None, cookies=None, headers=None, timeout=5, verify=True, stream=False,
allow_redirects=True):
rsp = requests.get(url, params=params, cookies=cookies, headers=headers, timeout=timeout, verify=verify,
stream=stream, allow_redirects=allow_redirects)
rsp.encoding = 'utf-8'
return rsp
def post(self, url, params=None, data=None, json=None, cookies=None, headers=None, timeout=5, verify=True,
stream=False, allow_redirects=True):
rsp = requests.post(url, params=params, data=data, json=json, cookies=cookies, headers=headers, timeout=timeout,
verify=verify, stream=stream, allow_redirects=allow_redirects)
rsp.encoding = 'utf-8'
return rsp
def html(self, content):
return etree.HTML(content)
def str2json(str):
return json.loads(str)
def json2str(str):
return json.dumps(str, ensure_ascii=False)
def getProxyUrl(self, local=True):
return f'{Proxy.getUrl(local)}?do=py'
def log(self, msg):
if isinstance(msg, dict) or isinstance(msg, list):
print(json.dumps(msg, ensure_ascii=False))
else:
print(f'{msg}')
def getCache(self, key):
value = self.fetch(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=get&key={key}', timeout=5).text
if len(value) > 0:
if value.startswith('{') and value.endswith('}') or value.startswith('[') and value.endswith(']'):
value = json.loads(value)
if type(value) == dict:
if not 'expiresAt' in value or value['expiresAt'] >= int(time.time()):
return value
else:
self.delCache(key)
return None
return value
else:
return None
def setCache(self, key, value):
if type(value) in [int, float]:
value = str(value)
if len(value) > 0:
if type(value) == dict or type(value) == list:
value = json.dumps(value, ensure_ascii=False)
r = self.post(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=set&key={key}', data={"value": value}, timeout=5)
return 'succeed' if r.status_code == 200 else 'failed'
def delCache(self, key):
r = self.fetch(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=del&key={key}', timeout=5)
return 'succeed' if r.status_code == 200 else 'failed'