Clean code
This commit is contained in:
parent
60bae4f617
commit
088df551b0
@ -1,7 +1,7 @@
|
|||||||
<script lang="ts" >
|
<script lang="ts" >
|
||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import { useRoute } from "vue-router";
|
import { useRoute } from "vue-router";
|
||||||
import type { StreamerData } from '../../../server/routes/profile/profileRoute'
|
import type { StreamerData } from '../../../server/types/scraping/Streamer'
|
||||||
|
|
||||||
import { VideoPlayer } from '@videojs-player/vue'
|
import { VideoPlayer } from '@videojs-player/vue'
|
||||||
import 'video.js/dist/video-js.css'
|
import 'video.js/dist/video-js.css'
|
||||||
|
@ -1,219 +1,16 @@
|
|||||||
import { Router } from 'express'
|
import { Router } from 'express'
|
||||||
import puppeteer from 'puppeteer-extra'
|
import { TwitchScraper } from '../../util/scraping/extractors'
|
||||||
import { Browser, Page } from 'puppeteer'
|
|
||||||
import { PuppeteerExtraPluginAdblocker } from 'puppeteer-extra-plugin-adblocker'
|
|
||||||
import { LooseObject } from '../../types/looseTypes'
|
|
||||||
import { Streamlink } from '@dragongoose/streamlink'
|
|
||||||
|
|
||||||
puppeteer.use(new PuppeteerExtraPluginAdblocker({
|
|
||||||
blockTrackersAndAnnoyances: true
|
|
||||||
}))
|
|
||||||
|
|
||||||
const profileRouter = Router()
|
const profileRouter = Router()
|
||||||
|
const scraper = new TwitchScraper()
|
||||||
export interface Socials {
|
|
||||||
type: string | null
|
|
||||||
text: string,
|
|
||||||
link: string
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface StreamData {
|
|
||||||
tags: string[]
|
|
||||||
title: string
|
|
||||||
topic: string
|
|
||||||
startedAt: number
|
|
||||||
qualities: string[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface StreamerData {
|
|
||||||
username: string,
|
|
||||||
followers: number,
|
|
||||||
followersAbbv: string,
|
|
||||||
isLive: boolean,
|
|
||||||
about: string,
|
|
||||||
socials?: string[],
|
|
||||||
pfp: string;
|
|
||||||
stream?: StreamData
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
const abbreviatedNumberToNumber = (num: string) => {
|
|
||||||
const base = parseFloat(num)
|
|
||||||
|
|
||||||
const matches: {[k: string]: number} = {
|
|
||||||
'k': 1000,
|
|
||||||
'm': 1000000,
|
|
||||||
'b': 1000000000
|
|
||||||
}
|
|
||||||
|
|
||||||
const abbreviation: string = num.charAt(num.length - 1).toLowerCase()
|
|
||||||
|
|
||||||
|
|
||||||
if(matches[abbreviation]) {
|
|
||||||
const numberOnly: number = Number(num.slice(0, -1))
|
|
||||||
return numberOnly * matches[abbreviation]
|
|
||||||
} else {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// https:// advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
|
|
||||||
const withBrowser = async (fn: Function) => {
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: true,
|
|
||||||
args: ['--no-sandbox']
|
|
||||||
});
|
|
||||||
try {
|
|
||||||
return await fn(browser);
|
|
||||||
} finally {
|
|
||||||
await browser.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const withPage = (browser: Browser) => async (fn: Function) => {
|
|
||||||
const page = await browser.newPage();
|
|
||||||
//await page.tracing.start({ path: '../profile.json', screenshots: true });
|
|
||||||
try {
|
|
||||||
return await fn(page);
|
|
||||||
} finally {
|
|
||||||
//await page.tracing.stop();
|
|
||||||
await page.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let isLive: boolean
|
|
||||||
|
|
||||||
const getStreamData = async (page: Page) => {
|
|
||||||
const streamData: LooseObject = {}
|
|
||||||
|
|
||||||
if(!isLive) return null
|
|
||||||
|
|
||||||
// Get stream tags
|
|
||||||
const tagsSelector = '.eUxEWt * span'
|
|
||||||
const tags: string[] = await page.$$eval(tagsSelector, elements => elements.map(el => el.innerHTML))
|
|
||||||
streamData.tags = tags
|
|
||||||
|
|
||||||
// Get stream title
|
|
||||||
const titleSelector = 'h2.CoreText-sc-1txzju1-0'
|
|
||||||
const title: string = await page.$eval(titleSelector, element => element.innerText)
|
|
||||||
streamData.title = title
|
|
||||||
|
|
||||||
// Get topic
|
|
||||||
const topicSelector = '.hfMGmo'
|
|
||||||
const topic = await page.$eval(topicSelector, element => element.textContent)
|
|
||||||
streamData.topic = topic
|
|
||||||
|
|
||||||
// Get Start time
|
|
||||||
const liveTimeSelector = '.live-time'
|
|
||||||
|
|
||||||
// formated as HH:MM:SS
|
|
||||||
const liveTime = await page.$eval(liveTimeSelector, element => element.textContent)
|
|
||||||
if(!liveTime) return
|
|
||||||
const liveTimeSplit: number[] = liveTime.split(':').map(Number)
|
|
||||||
let date = new Date()
|
|
||||||
let { hours, minutes, seconds } = { hours: date.getHours(), minutes: date.getMinutes(), seconds: date.getSeconds()}
|
|
||||||
|
|
||||||
// Subtracts current live time from current
|
|
||||||
// date to get the time the stream started
|
|
||||||
date.setHours(hours - liveTimeSplit[0])
|
|
||||||
date.setMinutes(minutes - liveTimeSplit[1])
|
|
||||||
date.setSeconds(seconds - liveTimeSplit[2])
|
|
||||||
|
|
||||||
streamData.startedAt = date.getTime()
|
|
||||||
|
|
||||||
return streamData as StreamData
|
|
||||||
}
|
|
||||||
|
|
||||||
const getAboutData = async (page: Page) => {
|
|
||||||
const aboutData: LooseObject = {}
|
|
||||||
|
|
||||||
if (!isLive) {
|
|
||||||
// Get data from about page
|
|
||||||
const aboutPageButtonSelector = 'li.InjectLayout-sc-1i43xsx-0:nth-child(2) > a:nth-child(1) > div:nth-child(1) > div:nth-child(1) > p:nth-child(1)'
|
|
||||||
await page.click(aboutPageButtonSelector)
|
|
||||||
}
|
|
||||||
await page.waitForSelector('.ccXeNc')
|
|
||||||
|
|
||||||
const followersSelector = '.kuAEke'
|
|
||||||
const followers = await page.$eval(followersSelector, element => element.innerHTML)
|
|
||||||
aboutData.followersAbbv = followers
|
|
||||||
aboutData.followers = abbreviatedNumberToNumber(followers)
|
|
||||||
|
|
||||||
const aboutSectionSelector = '.kLFSJC'
|
|
||||||
const aboutSection = await page.$eval(aboutSectionSelector, element => element.innerHTML)
|
|
||||||
aboutData.about = aboutSection
|
|
||||||
|
|
||||||
const socialSelector = '.ccXeNc * a'
|
|
||||||
const socials: Socials[] = await page.$$eval(socialSelector, elements => elements.map((el) => {
|
|
||||||
|
|
||||||
const getHostName = (url: string) => {
|
|
||||||
const match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
|
|
||||||
if (match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0) {
|
|
||||||
const hostname = match[2].split(".");
|
|
||||||
return hostname[0];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const validHosts = ['instagram', 'youtube', 'discord', 'tiktok','twitter']
|
|
||||||
const socialHost = getHostName(el.href) || el.href || ''
|
|
||||||
let type: string | null = socialHost
|
|
||||||
if(!validHosts.includes(socialHost))
|
|
||||||
type = null
|
|
||||||
|
|
||||||
return {
|
|
||||||
type,
|
|
||||||
link: el.href,
|
|
||||||
text: el.innerText
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
aboutData.socials = socials
|
|
||||||
|
|
||||||
const profilePictureSelector = 'figure.ScAvatar-sc-144b42z-0:nth-child(2) > img:nth-child(1)'
|
|
||||||
const profilePicutre = await page.$eval(profilePictureSelector, element => element.getAttribute('src'))
|
|
||||||
aboutData.pfp = profilePicutre
|
|
||||||
|
|
||||||
return aboutData as StreamerData
|
|
||||||
}
|
|
||||||
|
|
||||||
const getStreamerData = async (username: string): Promise<StreamerData> => {
|
|
||||||
let recoveredData: LooseObject = {}
|
|
||||||
|
|
||||||
await withBrowser(async (browser: Browser) => {
|
|
||||||
const result = await withPage(browser)(async (page: Page) => {
|
|
||||||
await page.goto(`https://twitch.tv/${username}`)
|
|
||||||
|
|
||||||
return Promise.all([getStreamData(page), getAboutData(page)])
|
|
||||||
})
|
|
||||||
|
|
||||||
recoveredData = result[1]
|
|
||||||
recoveredData.stream = result[0]
|
|
||||||
if(result[0] !== null) recoveredData.isLive = true
|
|
||||||
|
|
||||||
await browser.close()
|
|
||||||
})
|
|
||||||
|
|
||||||
recoveredData.username = username
|
|
||||||
return recoveredData as StreamerData
|
|
||||||
}
|
|
||||||
|
|
||||||
profileRouter.get('/users/:username', async (req, res, next) => {
|
profileRouter.get('/users/:username', async (req, res, next) => {
|
||||||
const username = req.params.username
|
const username = req.params.username
|
||||||
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
|
|
||||||
isLive = await streamlink.isLive()
|
|
||||||
|
|
||||||
let streamerData = await getStreamerData(username)
|
let streamerData = await scraper.getStreamerData(username)
|
||||||
.catch(next)
|
.catch(next)
|
||||||
|
|
||||||
if(streamerData && streamerData.stream && isLive)
|
|
||||||
streamerData.stream.qualities = await streamlink.getQualities()
|
|
||||||
|
|
||||||
if(streamerData) {
|
|
||||||
streamerData.isLive = isLive
|
|
||||||
res.send(streamerData)
|
res.send(streamerData)
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
export default profileRouter
|
export default profileRouter
|
24
server/types/scraping/Streamer.ts
Normal file
24
server/types/scraping/Streamer.ts
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
export interface Socials {
|
||||||
|
type: string | null
|
||||||
|
text: string,
|
||||||
|
link: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamData {
|
||||||
|
tags: string[]
|
||||||
|
title: string
|
||||||
|
topic: string
|
||||||
|
startedAt: number
|
||||||
|
qualities: string[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamerData {
|
||||||
|
username: string,
|
||||||
|
followers: number,
|
||||||
|
followersAbbv: string,
|
||||||
|
isLive: boolean,
|
||||||
|
about: string,
|
||||||
|
socials?: string[],
|
||||||
|
pfp: string;
|
||||||
|
stream?: StreamData
|
||||||
|
}
|
203
server/util/scraping/extractors.ts
Normal file
203
server/util/scraping/extractors.ts
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
import puppeteer from 'puppeteer-extra'
|
||||||
|
import { Browser, Page } from 'puppeteer'
|
||||||
|
import { PuppeteerExtraPluginAdblocker } from 'puppeteer-extra-plugin-adblocker'
|
||||||
|
import { LooseObject } from '../../types/looseTypes'
|
||||||
|
import { StreamData, StreamerData, Socials } from '../../types/scraping/Streamer'
|
||||||
|
import { Streamlink } from '@dragongoose/streamlink'
|
||||||
|
|
||||||
|
|
||||||
|
export class TwitchScraper {
|
||||||
|
public cache: Map<string, StreamerData> = new Map()
|
||||||
|
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
puppeteer.use(new PuppeteerExtraPluginAdblocker({
|
||||||
|
blockTrackersAndAnnoyances: true
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
private abbreviatedNumberToNumber = (num: string) => {
|
||||||
|
const base = parseFloat(num)
|
||||||
|
|
||||||
|
const matches: {[k: string]: number} = {
|
||||||
|
'k': 1000,
|
||||||
|
'm': 1000000,
|
||||||
|
'b': 1000000000
|
||||||
|
}
|
||||||
|
|
||||||
|
const abbreviation: string = num.charAt(num.length - 1).toLowerCase()
|
||||||
|
|
||||||
|
|
||||||
|
if(matches[abbreviation]) {
|
||||||
|
const numberOnly: number = Number(num.slice(0, -1))
|
||||||
|
return numberOnly * matches[abbreviation]
|
||||||
|
} else {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// https:// advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
|
||||||
|
private withBrowser = async (fn: Function) => {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox']
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
return await fn(browser);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private withPage = (browser: Browser) => async (fn: Function) => {
|
||||||
|
const page = await browser.newPage();
|
||||||
|
//await page.tracing.start({ path: '../profile.json', screenshots: true });
|
||||||
|
try {
|
||||||
|
return await fn(page);
|
||||||
|
} finally {
|
||||||
|
//await page.tracing.stop();
|
||||||
|
await page.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private getStreamData = async (page: Page, isLive: boolean) => {
|
||||||
|
const streamData: LooseObject = {}
|
||||||
|
|
||||||
|
if(!isLive) return null
|
||||||
|
|
||||||
|
// Get stream tags
|
||||||
|
const tagsSelector = '.eUxEWt * span'
|
||||||
|
const tags: string[] = await page.$$eval(tagsSelector, elements => elements.map(el => el.innerHTML))
|
||||||
|
streamData.tags = tags
|
||||||
|
|
||||||
|
// Get stream title
|
||||||
|
const titleSelector = 'h2.CoreText-sc-1txzju1-0'
|
||||||
|
const title: string = await page.$eval(titleSelector, element => element.innerText)
|
||||||
|
streamData.title = title
|
||||||
|
|
||||||
|
// Get topic
|
||||||
|
const topicSelector = '.hfMGmo'
|
||||||
|
const topic = await page.$eval(topicSelector, element => element.textContent)
|
||||||
|
streamData.topic = topic
|
||||||
|
|
||||||
|
// Get Start time
|
||||||
|
const liveTimeSelector = '.live-time'
|
||||||
|
|
||||||
|
// formated as HH:MM:SS
|
||||||
|
const liveTime = await page.$eval(liveTimeSelector, element => element.textContent)
|
||||||
|
if(!liveTime) return
|
||||||
|
const liveTimeSplit: number[] = liveTime.split(':').map(Number)
|
||||||
|
let date = new Date()
|
||||||
|
let { hours, minutes, seconds } = { hours: date.getHours(), minutes: date.getMinutes(), seconds: date.getSeconds()}
|
||||||
|
|
||||||
|
// Subtracts current live time from current
|
||||||
|
// date to get the time the stream started
|
||||||
|
date.setHours(hours - liveTimeSplit[0])
|
||||||
|
date.setMinutes(minutes - liveTimeSplit[1])
|
||||||
|
date.setSeconds(seconds - liveTimeSplit[2])
|
||||||
|
|
||||||
|
streamData.startedAt = date.getTime()
|
||||||
|
|
||||||
|
return streamData as StreamData
|
||||||
|
}
|
||||||
|
|
||||||
|
private getAboutData = async (page: Page, isLive: boolean) => {
|
||||||
|
const aboutData: LooseObject = {}
|
||||||
|
|
||||||
|
if (!isLive) {
|
||||||
|
// Get data from about page
|
||||||
|
const aboutPageButtonSelector = 'li.InjectLayout-sc-1i43xsx-0:nth-child(2) > a:nth-child(1) > div:nth-child(1) > div:nth-child(1) > p:nth-child(1)'
|
||||||
|
await page.click(aboutPageButtonSelector)
|
||||||
|
}
|
||||||
|
await page.waitForSelector('.kuAEke')
|
||||||
|
|
||||||
|
const followersSelector = '.kuAEke'
|
||||||
|
const followers = await page.$eval(followersSelector, element => element.innerHTML)
|
||||||
|
aboutData.followersAbbv = followers
|
||||||
|
aboutData.followers = this.abbreviatedNumberToNumber(followers)
|
||||||
|
|
||||||
|
const aboutSectionSelector = '.kLFSJC'
|
||||||
|
const aboutSection = await page.$eval(aboutSectionSelector, element => element.innerHTML)
|
||||||
|
aboutData.about = aboutSection
|
||||||
|
|
||||||
|
const socialSelector = '.ccXeNc * a'
|
||||||
|
const socials: Socials[] = await page.$$eval(socialSelector, elements => elements.map((el) => {
|
||||||
|
|
||||||
|
const getHostName = (url: string) => {
|
||||||
|
const match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
|
||||||
|
if (match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0) {
|
||||||
|
const hostname = match[2].split(".");
|
||||||
|
return hostname[0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const validHosts = ['instagram', 'youtube', 'discord', 'tiktok','twitter']
|
||||||
|
const socialHost = getHostName(el.href) || el.href || ''
|
||||||
|
let type: string | null = socialHost
|
||||||
|
if(!validHosts.includes(socialHost))
|
||||||
|
type = null
|
||||||
|
|
||||||
|
return {
|
||||||
|
type,
|
||||||
|
link: el.href,
|
||||||
|
text: el.innerText
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
aboutData.socials = socials
|
||||||
|
|
||||||
|
const profilePictureSelector = 'figure.ScAvatar-sc-144b42z-0:nth-child(2) > img:nth-child(1)'
|
||||||
|
const profilePicutre = await page.$eval(profilePictureSelector, element => element.getAttribute('src'))
|
||||||
|
aboutData.pfp = profilePicutre
|
||||||
|
|
||||||
|
return aboutData as StreamerData
|
||||||
|
}
|
||||||
|
|
||||||
|
public getStreamerData = async (username: string): Promise<StreamerData> => {
|
||||||
|
let recoveredData: LooseObject = {}
|
||||||
|
let isLive = await this.isLive(username)
|
||||||
|
|
||||||
|
await this.withBrowser(async (browser: Browser) => {
|
||||||
|
const result = await this.withPage(browser)(async (page: Page) => {
|
||||||
|
const res = await page.goto(`https://twitch.tv/${username}`)
|
||||||
|
|
||||||
|
if(!res?.ok()) {
|
||||||
|
return null
|
||||||
|
} else {
|
||||||
|
return Promise.all([this.getStreamData(page, isLive), this.getAboutData(page, isLive)])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
recoveredData = result[1]
|
||||||
|
recoveredData.stream = result[0]
|
||||||
|
if(result[0] !== null) recoveredData.isLive = true
|
||||||
|
|
||||||
|
await browser.close()
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
// add final information
|
||||||
|
if(recoveredData && recoveredData.stream && isLive)
|
||||||
|
recoveredData.stream.qualities = await this.getQualities(username)
|
||||||
|
|
||||||
|
if(recoveredData) {
|
||||||
|
recoveredData.isLive = isLive
|
||||||
|
}
|
||||||
|
|
||||||
|
recoveredData.username = username
|
||||||
|
return recoveredData as StreamerData
|
||||||
|
}
|
||||||
|
|
||||||
|
public isLive = async (username: string) => {
|
||||||
|
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
|
||||||
|
return await streamlink.isLive()
|
||||||
|
}
|
||||||
|
|
||||||
|
public getQualities = async (username: string) => {
|
||||||
|
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
|
||||||
|
return await streamlink.getQualities()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user