buddylive/py/videoevents.py
2025-10-13 00:51:02 -04:00

240 lines
8.6 KiB
Python

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.os_manager import ChromeType
import random
import time
import json
import pytz
import re
import urllib.parse
from datetime import datetime
# Function to convert UTC/EDT time to Eastern Time Zone (EST)
def utc_to_est(time_str):
# Determine if the time string has the extra `.000Z` format
if '.000Z' in time_str:
# Handle ISO format with milliseconds
time_format = '%Y-%m-%dT%H:%M:%S.%fZ'
time_zone = 'UTC'
elif 'Z' in time_str:
# Handle ISO format without milliseconds
time_format = '%Y-%m-%dT%H:%M:%SZ'
time_zone = 'UTC'
elif 'UTC' in time_str:
# Handle custom format provided
time_format = '%m/%d/%y %I:%M:%S %p UTC'
time_zone = 'UTC'
elif 'EDT' in time_str:
# Handle custom format for EDT
time_format = '%m/%d/%y %I:%M:%S %p EDT'
time_zone = 'EDT'
else:
raise ValueError("Unsupported timezone or format in time string")
# Parse the time string with the determined format
time = datetime.strptime(time_str, time_format)
# Set the timezone
if time_zone == 'UTC':
time = time.replace(tzinfo=pytz.utc)
elif time_zone == 'EDT':
time = time.replace(tzinfo=pytz.timezone('US/Eastern'))
# Convert the time to EST
est_time = time.astimezone(pytz.timezone('US/Eastern'))
# Format the EST time string
est_time_str = est_time.strftime('%m/%d/%y %I:%M:%S %p EST')
return est_time_str
# Function to extract date and time from a string
def extract_datetime(input_str):
# Regex pattern to match date and time formats
patterns = [
r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z', # ISO format with milliseconds
r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z', # ISO format without milliseconds
r'\d{1,2}/\d{1,2}/\d{2} \d{1,2}:\d{2}:\d{2} (AM|PM) UTC' # Custom format with UTC
]
for pattern in patterns:
match = re.search(pattern, input_str)
if match:
return match.group(0)
# If no matching date format found, raise an error
return input_str
user_agents = [
#add your list of user agents here
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/126.0.6478.35 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.165 Mobile Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.5; rv:127.0) Gecko/20100101 Firefox/127.0',
'Mozilla/5.0 (X11; Linux i686; rv:127.0) Gecko/20100101 Firefox/127.0',
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/127.0 Mobile/15E148 Safari/605.1.15',
'Mozilla/5.0 (Android 14; Mobile; rv:127.0) Gecko/126.0 Firefox/126.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0',
]
chrome_service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
# Set Chrome options
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--crash-dumps-dir=/tmp")
# Randomly select a user agent
user_agent = random.choice(user_agents)
chrome_options.add_argument(f"user-agent={user_agent}")
# Initialize the Chrome WebDriver with the specified options
driver = webdriver.Chrome(service=chrome_service, options=chrome_options)
stealth(
driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
# Open the webpage
url = "https://thetvapp.to/"
driver.get(url)
# Wait for the page to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "row")))
# Find all the rows containing the desired links
rows = driver.find_elements(By.CLASS_NAME, "row")
# Initialize a list to store the links
all_links = []
# Iterate over each row
for row in rows:
# Find the group name (e.g., MLB)
group_name = row.find_element(By.TAG_NAME, "h3").text
# Check if it's not Live TV Channels
if group_name != "Live TV Channels":
# Find all links in the row
links = row.find_elements(By.TAG_NAME, "a")
# Iterate over each link
for link in links:
# Get the channel name
channel_name = link.text.strip()
# Get the link URL and add it to the list
link_url = link.get_attribute("href")
all_links.append((group_name, channel_name, link_url))
# Print the M3U header
print("#EXTM3U")
# Example usage of extracting and converting time:
for group, name, link in all_links:
# Navigate to the link URL
driver.get(link)
try:
# Wait for the button to be clickable
wait = WebDriverWait(driver, 5)
#try:
# Try to find loadVideoBtn first
# video_button = wait.until(EC.element_to_be_clickable((By.ID, 'loadVideoBtn')))
#except:
# If loadVideoBtn is not found, look for loadVideoBtnTwo
# video_button = wait.until(EC.element_to_be_clickable((By.ID, 'loadVideoBtn')))
#video_button.click()
# Wait for a brief period to allow the page to load and network requests to be made
time.sleep(10)
# Get all network requests
network_requests = driver.execute_script("return JSON.stringify(performance.getEntries());")
# Convert the string back to a list of dictionaries in Python
network_requests = json.loads(network_requests)
# Filter out only the URLs containing ".m3u8"
m3u8_urls = [request["name"] for request in network_requests if ".m3u8" in request["name"]]
cleaned_m3u8_urls = []
for url in m3u8_urls:
if "ping.gif" in url and "mu=" in url:
# Extract mu= value
parsed = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed.query)
if "mu" in query_params:
# Decode the real .m3u8 URL
real_url = urllib.parse.unquote(query_params["mu"][0])
cleaned_m3u8_urls.append(real_url)
else:
cleaned_m3u8_urls.append(url)
m3u8_urls = cleaned_m3u8_urls
# Use the first collected m3u8 URL, or fallback if not found
if m3u8_urls:
m3u8_url = m3u8_urls[0]
else:
m3u8_url = "https://github.com/mikekaprielian/rtnaodhor93n398/raw/main/en/offline.mp4"
except Exception as e:
# If an exception occurs (e.g., button not found), use the default link
m3u8_url = "https://github.com/mikekaprielian/rtnaodhor93n398/raw/main/en/offline.mp4"
# Replace invalid characters in the name
name_fixed = name.replace(',', '')
name_fixed = name_fixed.replace(': ', ' - ')
name_parts = name_fixed.split(' - ')
title = name_parts[0]
rest_of_title = ' - '.join(name_parts[1:])
try:
# Extract the date and time portion from the rest_of_title
date_time_part = extract_datetime(rest_of_title)
# Convert to EST
est_time_str = utc_to_est(date_time_part)
except ValueError as e:
# Handle cases where the date extraction fails
print(f"Error converting time: {e}")
est_time_str = rest_of_title # Fall back to displaying the original text
# Print the channel information in the M3U format
print(f"#EXTINF:-1 group-title=\"{group}\",{est_time_str} = {title}")
print(m3u8_url) # Print only the first m3u8 URL
# Close the WebDriver
driver.quit()