Quantcast
Channel: Active questions tagged youtube-api - Stack Overflow
Viewing all articles
Browse latest Browse all 3831

How to get more than 500 comments of Youtube video using Youtube Data API

$
0
0

I've tried get Youtube comments using Youtube Data API dan Python, but i still can't catch all the comments, as example a video have 36,000 comments but only catch 20,000 comments.

I tried it two ways, in this first way the comment retrieval process took a long time, would be faster if the ```if "nextPageToken" in response....`` is not use.

def get_comments(youtube, video_id, token):  komentar = []  request = youtube.commentThreads().list(part='snippet, replies', videoId= video_id, maxResults = 10)  response = request.execute()  for idx, cmt in enumerate(response['items']):    topLvl = dict(        videoId = cmt['snippet']['videoId'],        commentId = cmt['snippet']['topLevelComment']['id'],        username = cmt['snippet']['topLevelComment']['snippet']['authorDisplayName'],        comment = cmt['snippet']['topLevelComment']['snippet']['textDisplay'],        likeCount = cmt['snippet']['topLevelComment']['snippet']['likeCount'],        publishedAt = cmt['snippet']['topLevelComment']['snippet']['publishedAt'])    komentar.append(topLvl)    if 'replies' in response['items'][idx]:      for v in range(len(response['items'][idx]['replies']['comments'])):        replies1 = dict(            video_id = cmt['replies']['comments'][v]['snippet']['videoId'],                        commentId = cmt['replies']['comments'][v]['id'],                        parentId = cmt['replies']['comments'][v]['snippet']['parentId'],                        username = cmt['replies']['comments'][v]['snippet']['authorDisplayName'],                        comment = cmt['replies']['comments'][v]['snippet']['textDisplay'],                        likeCount = cmt['replies']['comments'][v]['snippet']['likeCount'],                        publishedAt = cmt['replies']['comments'][v]['snippet']['publishedAt'])        # print("rep 1: ", replies1)        komentar.append(replies1)    totalReplyCount = cmt['snippet']['totalReplyCount']    replies_response=youtube.comments().list(part='snippet',maxResults=100,parentId=cmt['id']).execute()    for indx, reply in enumerate(replies_response['items']):      # print("pp ", reply['snippet']['authorDisplayName'])      replies2 = dict(          # video_id = reply['videoId'],                    commentId = reply['id'],                    parentId = reply['snippet']['parentId'],                    username = reply['snippet']['authorDisplayName'],                    comment = reply['snippet']['textDisplay'],                    likeCount = reply['snippet']['likeCount'],                    publishedAt = reply['snippet']['publishedAt'])      # print("rep 2: ", replies2)      komentar.append(replies2)      while "nextPageToken" in replies_response:        token_reply = replies_response['nextPageToken']        replies_response=youtube.comments().list(part='snippet',maxResults=100,parentId=cmt['id'],pageToken=token_reply).execute()        for indx, reply in enumerate(replies_response['items']):          replies3 = dict(              commentId = reply['id'],                        parentId = reply['snippet']['parentId'],                        username = reply['snippet']['authorDisplayName'],                        comment = reply['snippet']['textDisplay'],                        likeCount = reply['snippet']['likeCount'],                        publishedAt = reply['snippet']['publishedAt'])          # print("rep 3: ", replies3)          komentar.append(replies3)  if "nextPageToken" in response:     return get_comments(youtube, video_id, response['nextPageToken'])   else:     komentar = [x for x in komentar if len(x) > 0]    return []  return komentar

This second method has many lines of code, even repetitive. But in this way can get more comment data

def video_comments(youtube, video_ids):        all_comments = []        request = youtube.commentThreads().list(part='snippet, replies', videoId= video_id, maxResults = 100)        response = request.execute()        for i in range(0, len(response['items'])):            data = dict(                video_id = response['items'][i]['snippet']['videoId'],                commentId = response['items'][i]['snippet']['topLevelComment']['id'],                username = response['items'][i]['snippet']['topLevelComment']['snippet']['authorDisplayName'],                comment = response['items'][i]['snippet']['topLevelComment']['snippet']['textDisplay'],                like = response['items'][i]['snippet']['topLevelComment']['snippet']['likeCount'],                totalReply = response['items'][i]['snippet']['totalReplyCount'],                publishedAt = response['items'][i]['snippet']['topLevelComment']['snippet']['publishedAt'])            if 'replies' in response['items'][i]:                for v in range(len(response['items'][i]['replies']['comments'])):                    dataReplies = dict(                            video_id = response['items'][i]['replies']['comments'][v]['snippet']['videoId'],                            commentId = response['items'][i]['replies']['comments'][v]['id'],                            username = response['items'][i]['replies']['comments'][v]['snippet']['authorDisplayName'],                            comment = response['items'][i]['replies']['comments'][v]['snippet']['textDisplay'],                            like = response['items'][i]['replies']['comments'][v]['snippet']['likeCount'],                            publishedAt = response['items'][i]['replies']['comments'][v]['snippet']['publishedAt']                    )                    all_comments.append(dataReplies)            all_comments.append(data)        next_page_token = response.get('nextPageToken')        more_pages = True        while more_pages:            if next_page_token is None:                more_pages = False            else:                request = youtube.commentThreads().list(part='snippet, replies', videoId=video_id, maxResults = 100, pageToken = next_page_token)                response = request.execute()                for i in range(0, len(response['items'])):                    data = dict(                        video_id = response['items'][i]['snippet']['videoId'],                        commentId = response['items'][i]['snippet']['topLevelComment']['id'],                        username = response['items'][i]['snippet']['topLevelComment']['snippet']['authorDisplayName'],                        comment = response['items'][i]['snippet']['topLevelComment']['snippet']['textDisplay'],                        like = response['items'][i]['snippet']['topLevelComment']['snippet']['likeCount'],                        totalReply = response['items'][i]['snippet']['totalReplyCount'],                        publishedAt = response['items'][i]['snippet']['topLevelComment']['snippet']['publishedAt'])                    if 'replies' in response['items'][i]:                        for v in range(len(response['items'][i]['replies']['comments'])):                            dataReplies = dict(                                    video_id = response['items'][i]['replies']['comments'][v]['snippet']['videoId'],                                    commentId = response['items'][i]['replies']['comments'][v]['id'],                                    username = response['items'][i]['replies']['comments'][v]['snippet']['authorDisplayName'],                                    comment = response['items'][i]['replies']['comments'][v]['snippet']['textDisplay'],                                    like = response['items'][i]['replies']['comments'][v]['snippet']['likeCount'],                                    publishedAt = response['items'][i]['replies']['comments'][v]['snippet']['publishedAt']                            )                            all_comments.append(dataReplies)                    all_comments.append(data)                next_page_token = response.get('nextPageToken')        return (all_comments)

Help me, this is my first time scraping data


Viewing all articles
Browse latest Browse all 3831

Trending Articles