Im new to Python and Im trying to code a commentscraper for youtube with the most important informations, which I put in a JSON-file. But the my amount of comments and replys is not the same as on Youtube. I don't know, where my error is. I recognized, that it doesn't write any data in the files, if there are less than 20 comments, but I don't know, where I have to change something...
Example:
https://youtu.be/Re1m9O7q-9U here I get 102, but it should be 107
https://youtu.be/Q9Y5m1fQ7Fk here I get 423, but it should be 486
https://youtu.be/cMhE5BfmFkM here I get 1315, but it should be 2052
Here is the code:
class YT_Comments:def __init__(self, api_key): self.api_key = api_key self.comment_int = 0def get_video_comments(self, video_id, limit): url = f"https://youtube.googleapis.com/youtube/v3/commentThreads?part=replies%2C%20snippet&order=relevance&videoId={video_id}&key={self.api_key}" vid_comments = [] pc, npt = self._get_comments_per_page(url) if limit is not None and isinstance(limit, int): url += f"&maxResults={str(limit)}" while (npt is not None): nexturl = url +"&pageToken=" + npt pc, npt = self._get_comments_per_page(nexturl) vid_comments.append(pc) print(self.comment_int) print(len(vid_comments)) return vid_commentsdef _get_comments_per_page(self, url): json_url = requests.get(url) data = json.loads(json_url.text) page_comments = [] if "items" not in data: return page_comments, None item_data = data["items"] nextPageToken = data.get("nextPageToken", None) for item in tqdm.tqdm(item_data): try: kind = item["kind"] if kind == "youtube#comment" or "youtube#commentThread": comment_text = item["snippet"]["topLevelComment"]["snippet"]["textOriginal"] comment_author = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"] author_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"] comment_like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"] comment_date = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"] comment = {"comment_text" : comment_text,"comment_author" : comment_author,"comment_author_id" : author_id,"comment_like_count" : comment_like_count,"comment_date" : comment_date} replies_l = [] self.comment_int += 1 try: replies = item["replies"]["comments"] for reply in replies: reply_txt = reply["snippet"]["textOriginal"] reply_author = reply["snippet"]["authorDisplayName"] reply_author_id = reply["snippet"]["authorChannelId"]["value"] reply_like_count = reply["snippet"]["likeCount"] reply_date = reply["snippet"]["publishedAt"] reply_dict = {"text" : reply_txt,"author" : reply_author,"author_id" : reply_author_id,"likes" : reply_like_count,"date" : reply_date} replies_l.append(reply_dict) self.comment_int +=1 except KeyError: replies_l.append(None) comment_dict = {"comment": comment,"replies": replies_l, } page_comments.append(comment_dict) except KeyError: print("No Comments") return page_comments, nextPageToken