I am new to pyhton youtube api, and for a homework i want to make a simple analisis on youtube chanels.
That takes a bunch of random channels, gets the amount of total views/total videos and prints them out
I am pretty decent with python, but none of the youtube api videos/docs explain how to do this, so if anyone can help i'd really apprechiate that!
thanks
script to get the amount of videos:
import scrapetubevideos = scrapetube.get_channel("UCX6OQ3DkcsbYNE6H8uQQuVA")list_of_videos = []for video in videos: list_of_videos.append(video['videoId'])print(len(list_of_videos))script to get the amount of views(by python engineeer):
import jsonimport requestsfrom tqdm import tqdmclass YTstats: def __init__(self, api_key, channel_id): self.api_key = api_key self.channel_id = channel_id self.channel_statistics = None self.video_data = None def extract_all(self): self.get_channel_statistics() self.get_channel_video_data() def get_channel_statistics(self):"""Extract the channel statistics""" print('get channel statistics...') url = f'https://www.googleapis.com/youtube/v3/channels?part=statistics&id={self.channel_id}&key={self.api_key}' pbar = tqdm(total=1) json_url = requests.get(url) data = json.loads(json_url.text) try: data = data['items'][0]['statistics'] except KeyError: print('Could not get channel statistics') data = {} self.channel_statistics = data pbar.update() pbar.close() return data def get_channel_video_data(self):"Extract all video information of the channel" print('get video data...') channel_videos, channel_playlists = self._get_channel_content(limit=50) parts=["snippet", "statistics","contentDetails", "topicDetails"] for video_id in tqdm(channel_videos): for part in parts: data = self._get_single_video_data(video_id, part) channel_videos[video_id].update(data) self.video_data = channel_videos return channel_videos def _get_single_video_data(self, video_id, part):""" Extract further information for a single video parts can be: 'snippet', 'statistics', 'contentDetails', 'topicDetails'""" url = f"https://www.googleapis.com/youtube/v3/videos?part={part}&id={video_id}&key={self.api_key}" json_url = requests.get(url) data = json.loads(json_url.text) try: data = data['items'][0][part] except KeyError as e: print(f'Error! Could not get {part} part of data: \n{data}') data = dict() return data def _get_channel_content(self, limit=None, check_all_pages=True):""" Extract all videos and playlists, can check all available search pages channel_videos = videoId: title, publishedAt channel_playlists = playlistId: title, publishedAt return channel_videos, channel_playlists""" url = f"https://www.googleapis.com/youtube/v3/search?key={self.api_key}&channelId={self.channel_id}&part=snippet,id&order=date" if limit is not None and isinstance(limit, int): url += "&maxResults=" + str(limit) vid, pl, npt = self._get_channel_content_per_page(url) idx = 0 while(check_all_pages and npt is not None and idx < 10): nexturl = url +"&pageToken=" + npt next_vid, next_pl, npt = self._get_channel_content_per_page(nexturl) vid.update(next_vid) pl.update(next_pl) idx += 1 return vid, pl def _get_channel_content_per_page(self, url):""" Extract all videos and playlists per page return channel_videos, channel_playlists, nextPageToken""" json_url = requests.get(url) data = json.loads(json_url.text) channel_videos = dict() channel_playlists = dict() if 'items' not in data: print('Error! Could not get correct channel data!\n', data) return channel_videos, channel_videos, None nextPageToken = data.get("nextPageToken", None) item_data = data['items'] for item in item_data: try: kind = item['id']['kind'] published_at = item['snippet']['publishedAt'] title = item['snippet']['title'] if kind == 'youtube#video': video_id = item['id']['videoId'] channel_videos[video_id] = {'publishedAt': published_at, 'title': title} elif kind == 'youtube#playlist': playlist_id = item['id']['playlistId'] channel_playlists[playlist_id] = {'publishedAt': published_at, 'title': title} except KeyError as e: print('Error! Could not extract data from item:\n', item) return channel_videos, channel_playlists, nextPageToken def dump(self):"""Dumps channel statistics and video data in a single json file""" if self.channel_statistics is None or self.video_data is None: print('data is missing!\nCall get_channel_statistics() and get_channel_video_data() first!') return fused_data = {self.channel_id: {"channel_statistics": self.channel_statistics,"video_data": self.video_data}} channel_title = self.video_data.popitem()[1].get('channelTitle', self.channel_id) channel_title = channel_title.replace(" ", "_").lower() filename = channel_title +'.json' with open(filename, 'w') as f: json.dump(fused_data, f, indent=4) print('file dumped to', filename)