import puppeteer from 'puppeteer-extra' import { Browser, Page } from 'puppeteer' import { PuppeteerExtraPluginAdblocker } from 'puppeteer-extra-plugin-adblocker' import { LooseObject } from '../../types/looseTypes' import { StreamData, StreamerData, Socials } from '../../types/scraping/Streamer' import { Streamlink } from '@dragongoose/streamlink' export class TwitchScraper { public cache: Map = new Map() constructor() { puppeteer.use(new PuppeteerExtraPluginAdblocker({ blockTrackersAndAnnoyances: true })) } private abbreviatedNumberToNumber = (num: string) => { const base = parseFloat(num) const matches: {[k: string]: number} = { 'k': 1000, 'm': 1000000, 'b': 1000000000 } const abbreviation: string = num.charAt(num.length - 1).toLowerCase() if(matches[abbreviation]) { const numberOnly: number = Number(num.slice(0, -1)) return numberOnly * matches[abbreviation] } else { return null } } // https:// advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/ private withBrowser = async (fn: Function) => { const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] }); try { return await fn(browser); } finally { await browser.close(); } } private withPage = (browser: Browser) => async (fn: Function) => { const page = await browser.newPage(); //await page.tracing.start({ path: '../profile.json', screenshots: true }); try { return await fn(page); } finally { //await page.tracing.stop(); await page.close(); } } private getStreamData = async (page: Page, isLive: boolean) => { const streamData: LooseObject = {} if(!isLive) return null // Get stream tags const tagsSelector = '.eUxEWt * span' const tags: string[] = await page.$$eval(tagsSelector, elements => elements.map(el => el.innerHTML)) streamData.tags = tags // Get stream title const titleSelector = 'h2.CoreText-sc-1txzju1-0' const title: string = await page.$eval(titleSelector, element => element.innerText) streamData.title = title // Get topic const topicSelector = '.hfMGmo' const topic = await page.$eval(topicSelector, element => element.textContent) streamData.topic = topic // Get Start time const liveTimeSelector = '.live-time' // formated as HH:MM:SS const liveTime = await page.$eval(liveTimeSelector, element => element.textContent) if(!liveTime) return const liveTimeSplit: number[] = liveTime.split(':').map(Number) let date = new Date() let { hours, minutes, seconds } = { hours: date.getHours(), minutes: date.getMinutes(), seconds: date.getSeconds()} // Subtracts current live time from current // date to get the time the stream started date.setHours(hours - liveTimeSplit[0]) date.setMinutes(minutes - liveTimeSplit[1]) date.setSeconds(seconds - liveTimeSplit[2]) streamData.startedAt = date.getTime() return streamData as StreamData } private getAboutData = async (page: Page, isLive: boolean) => { const aboutData: LooseObject = {} if (!isLive) { // Get data from about page const aboutPageButtonSelector = 'li.InjectLayout-sc-1i43xsx-0:nth-child(2) > a:nth-child(1) > div:nth-child(1) > div:nth-child(1) > p:nth-child(1)' await page.click(aboutPageButtonSelector) } await page.waitForSelector('.kuAEke') const followersSelector = '.kuAEke' const followers = await page.$eval(followersSelector, element => element.innerHTML) aboutData.followersAbbv = followers aboutData.followers = this.abbreviatedNumberToNumber(followers) const aboutSectionSelector = '.kLFSJC' const aboutSection = await page.$eval(aboutSectionSelector, element => element.innerHTML) aboutData.about = aboutSection const socialSelector = '.ccXeNc * a' const socials: Socials[] = await page.$$eval(socialSelector, elements => elements.map((el) => { const getHostName = (url: string) => { const match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i); if (match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0) { const hostname = match[2].split("."); return hostname[0]; } else { return null; } } const validHosts = ['instagram', 'youtube', 'discord', 'tiktok','twitter'] const socialHost = getHostName(el.href) || el.href || '' let type: string | null = socialHost if(!validHosts.includes(socialHost)) type = null return { type, link: el.href, text: el.innerText } })) aboutData.socials = socials const profilePictureSelector = 'figure.ScAvatar-sc-144b42z-0:nth-child(2) > img:nth-child(1)' const profilePicutre = await page.$eval(profilePictureSelector, element => element.getAttribute('src')) aboutData.pfp = profilePicutre return aboutData as StreamerData } public getStreamerData = async (username: string): Promise => { let recoveredData: LooseObject = {} let isLive = await this.isLive(username) await this.withBrowser(async (browser: Browser) => { const result = await this.withPage(browser)(async (page: Page) => { const res = await page.goto(`https://twitch.tv/${username}`) if(!res?.ok()) { return null } else { return Promise.all([this.getStreamData(page, isLive), this.getAboutData(page, isLive)]) } }) recoveredData = result[1] recoveredData.stream = result[0] if(result[0] !== null) recoveredData.isLive = true await browser.close() }) // add final information if(recoveredData && recoveredData.stream && isLive) recoveredData.stream.qualities = await this.getQualities(username) if(recoveredData) { recoveredData.isLive = isLive } recoveredData.username = username return recoveredData as StreamerData } public isLive = async (username: string) => { const streamlink = new Streamlink(`https://twitch.tv/${username}`, {}) return await streamlink.isLive() } public getQualities = async (username: string) => { const streamlink = new Streamlink(`https://twitch.tv/${username}`, {}) return await streamlink.getQualities() } }