I'm working on a python script based on this excellent example fromCan't download video captions using youtube API v3 in pythonfrom wescpy.I had it working fine but introduced some additional features such as reading from a file of ids to process.But now when I run the code I get a data type error which I really can't track down:
caption_data = caption_str.split('\n\n')TypeError: a bytes-like object is required, not 'str'My code is below and I have no idea why the API data is retuning the "wrong" data type.
#!/usr/bin/env python3from __future__ import print_functionimport sysfrom apiclient import discoveryfrom httplib2 import Httpfrom oauth2client import file, client, toolsimport requests#import pybot_1 - to be used to send notificationsSCOPES = 'https://www.googleapis.com/auth/youtube.force-ssl'store = file.Storage('mycreds.json')creds = store.get()if not creds or creds.invalid: flow = client.flow_from_clientsecrets('client_secrets.json', SCOPES) creds = tools.run_flow(flow, store)YOUTUBE = discovery.build('youtube', 'v3', http=creds.authorize(Http()))def process(vid, vfile): caption_info = YOUTUBE.captions().list( part='id', videoId=vid).execute().get('items', []) # id and etag etc #print(caption_info) # Get the raw captions data caption_str = YOUTUBE.captions().download( id=caption_info[0]['id'], tfmt='srt').execute() #raw data #print(caption_str) caption_data = caption_str.split('\n\n') caption_txt="" #writes captions to file for local copy #caption_file = open('/captions_files/'+ vfile +'_'+ vid +'.txt', 'w') for line in caption_data: if line.count('\n') > 1: i, cap_time, caption = line.split('\n', 2) caption_txt +=(''.join(caption.split()) +'\n') #print(''.join(caption.split()), file=caption_file) #print(caption_txt) #print(caption_txt, file=caption_file) mystory = caption_txt.replace('\n', '<br />') url = "mycmsform.html" data = {"ytid":VID, "headline": 'AUTOYT -'+ vfile +' ID- '+ VID , "captions": mystory} headers = {'User-Agent': 'mycustomapp', 'X-Requested-With': 'XMLHttpRequest' } r= requests.post(url,data=data) return r #except: # return "error"if __name__ == '__main__': import sys thefile = 'NOT_CAPTIONED_IDS.txt' file1 = open(thefile, 'r') lines = file1.readlines() file1.close() #print(lines) linescopy= lines count = 0 fresult="<Response [200]>" for pair in lines: count += 1 vidstring = (pair.strip()) #print(vidstring) vidid= pair.split(':',1) vfile=vidid[1].strip() VID=str(vidid[0].strip()) #print('vfile: '+ vfile) #print('VID: '+ VID) if len(VID) >=3: xy=process(VID, vfile) else: continue #print(xy) if str(xy) == fresult: file1 = open(thefile, 'w') #print(count-1) #remove the process item from the list linescopy.pop(count-1) print("CAPTIONS ***\n Captions posted for: " + vfile) # do something to notify the team tmsg = ('(YTU) CAPTIONS: '+ vfile +': Uploaded to CMS ') # SEND TELEGRAM MESSAGE commented out for now #pybot_1.tgmessage(tmsg) linesasstr=','.join(linescopy) #Need to figure out how to write one list item per line for listitem in linescopy: file1. write('%s\n' % listitem) file1.close() elif str(xy) == "error": #THERE WAS A PROBLEM OR NO CAPTIONS YET - need to differentiate pass else: #Nothing to process write the list back to the file file1 = open(thefile, 'w') for listitem in linescopy: file1. write('%s\n' % listitem) file1.close()As requested:
print(caption_str)b"1\n00:00:01,850 --> 00:00:20,129\n[Music]\n\n2\n00:00:16,800 --> 00:00:20,129\n[Applause]\n\n3\n00:00:28,800 --> 00:00:31,439\nactually so\n\n4\n00:00:30,320 --> 00:00:33,680\nit's\n\n5\n00:00:31,439 --> 00:00:35,520\nactually the usual trick that they used\n\n6\n00:00:33,680 --> 00:00:37,760\non juno series they had the chorus to\n\n7\n00:00:35,520 --> 00:00:40,160\nspread out the voices now we here we\n\n8\n00:00:37,760 --> 00:00:41,760\nhave the reverb that's that's because\n\n9\n00:00:40,160 --> 00:00:44,320\nit's a single oscillator\n\n10\n00:00:41,760 --> 00:00:47,039\nso i've got to ask you because typhon\n\n11\n00:00:44,320 --> 00:00:49,039\njust got usb audio right is there usb\n\n12\n00:00:47,039 --> 00:00:51,039\naudio on this uh\n\n13\n00:00:49,039 --> 00:00:54,160\nwe almost had it\n\n14\n00:00:51,039 --> 00:00:56,559\nthen some small disks came up but we're\n\n15\n00:00:54,160 --> 00:00:58,559\ngonna have it delivered maybe\n\n16\n00:00:56,559 --> 00:01:00,239\nin a month or so and would that give you\n\n17\n00:00:58,559 --> 00:01:02,800\nthe opportunity to maybe bring this\n\n18\n00:01:00,239 --> 00:01:04,960\nstereo back uh stereo in the reverb only\n\n19\n00:01:02,800 --> 00:01:07,840\nrather than have this here here too yeah\n\n20\n00:01:04,960 --> 00:01:09,760\noh no usb audio here uh okay yeah we do\n\n21\n00:01:07,840 --> 00:01:13,119\nwe don't have a\n\n22\n00:01:09,760 --> 00:01:15,920\ncodec an audio codec to send out the\n\n23\n00:01:13,119 --> 00:01:18,320\nthe audio via usb we can only have midi\n\n24\n00:01:15,920 --> 00:01:20,479\nright okay all right fair enough so\n\n25\n00:01:18,320 --> 00:01:22,159\nonly on the output it's a very pretty\n\n26\n00:01:20,479 --> 00:01:23,280\nthing uh dimitra's done a lovely job of\n\n27\n00:01:22,159 --> 00:01:24,840\nthe uh\n\n28\n00:01:23,280 --> 00:01:27,840\nof the front\n\n29\n00:01:24,840 --> 00:01:29,520\npanel yeah so um\n\n30\n00:01:27,840 --> 00:01:30,880\nwhen and i mean you know everybody's\n\n31\n00:01:29,520 --> 00:01:32,720\nstruggling to make stuff because of\n\n32\n00:01:30,880 --> 00:01:34,880\nchips and all that when you when you\n\n33\n00:01:32,720 --> 00:01:37,439\nanticipate this being out uh in 40 days\n\n34\n00:01:34,880 --> 00:01:39,439\ni think it will be at subs 40 for 50\n\n35\n00:01:37,439 --> 00:01:41,119\ndays let's say something like that and\n\n36\n00:01:39,439 --> 00:01:43,600\nwhat sort of price do you go for it's\n\n37\n00:01:41,119 --> 00:01:46,640\n500 euros\n\n38\n00:01:43,600 --> 00:01:49,520\ngood work um i did my best i really\n\n39\n00:01:46,640 --> 00:01:53,280\ncannot go any lower than this i hope\n\n40\n00:01:49,520 --> 00:01:55,920\npeople buy it and be happy with it\n\n41\n00:01:53,280 --> 00:01:57,680\nand have fun yeah sounds lovely okay\n\n42\n00:01:55,920 --> 00:02:00,240\nwell i mean maybe a couple more presets\n\n43\n00:01:57,680 --> 00:02:02,880\nand then we can say goodbye okay\n\n44\n00:02:00,240 --> 00:02:05,360\nlet's try something more\n\n45\n00:02:02,880 --> 00:02:05,360\nsubtile\n\n46\n00:02:07,040 --> 00:02:11,360\nnow let's try\n\n47\n00:02:08,879 --> 00:02:11,360\nthis one\n\n48\n00:02:11,430 --> 00:02:19,160\n[Music]\n\n49\n00:02:15,440 --> 00:02:19,160\nmore resin is here\n\n50\n00:02:26,560 --> 00:02:32,239\nlet's find the sword presets\n\n51\n00:02:30,640 --> 00:02:35,400\nthere\n\n52\n00:02:32,239 --> 00:02:35,400\nsome brass\n\n53\n00:02:39,599 --> 00:02:42,870\nperfect just for the german umpire\n\n54\n00:02:41,360 --> 00:02:45,200\nmarket yeah yeah yeah\n\n55\n00:02:42,870 --> 00:02:46,720\n[Music]\n\n56\n00:02:45,200 --> 00:02:49,100\nor\n\n57\n00:02:46,720 --> 00:02:50,400\nlet's try here\n\n58\n00:02:49,100 --> 00:02:52,140\n[Music]\n\n59\n00:02:50,400 --> 00:02:56,720\na more deep preset\n\n60\n00:02:52,140 --> 00:02:59,599\n[Music]\n\n61\n00:02:56,720 --> 00:03:00,400\nso each preset can store the voice mode\n\n62\n00:02:59,599 --> 00:03:02,080\nas well\n\n63\n00:03:00,400 --> 00:03:03,040\nyes of course you can store the voice\n\n64\n00:03:02,080 --> 00:03:05,360\nmode\n\n65\n00:03:03,040 --> 00:03:07,680\nyou also have chords that you can use in\n\n66\n00:03:05,360 --> 00:03:10,560\nstack modes where it's in a unison try\n\n67\n00:03:07,680 --> 00:03:12,640\nmode and do mode and you can edit all\n\n68\n00:03:10,560 --> 00:03:14,560\nthe chords for each\n\n69\n00:03:12,640 --> 00:03:16,840\npreset and you have seven different\n\n70\n00:03:14,560 --> 00:03:19,120\nchords which by the way you can\n\n71\n00:03:16,840 --> 00:03:22,239\nmodulate let me show that yeah that\n\n72\n00:03:19,120 --> 00:03:24,959\nsounds nice yeah so let's go to a unison\n\n73\n00:03:22,239 --> 00:03:27,380\nmode and\n\n74\n00:03:24,959 --> 00:03:31,569\nanother chord\n\n75\n00:03:27,380 --> 00:03:31,569\n[Music]\n\n76\n00:03:32,000 --> 00:03:34,879\nbecause it's unison\n\n77\n00:03:33,680 --> 00:03:37,760\nthere\n\n78\n00:03:34,879 --> 00:03:39,599\nso we have our chord and then i go\n\n79\n00:03:37,760 --> 00:03:40,959\nchoose lfo2\n\n80\n00:03:39,599 --> 00:03:45,159\nand\n\n81\n00:03:40,959 --> 00:03:45,159\nmodulate the chord\n\n82\n00:03:45,820 --> 00:03:55,120\n[Music]\n\n83\n00:03:53,920 --> 00:03:56,319\ninteresting\n\n84\n00:03:55,120 --> 00:03:57,680\nor\n\n85\n00:03:56,319 --> 00:03:59,760\nmaybe you should hear that another\n\n86\n00:03:57,680 --> 00:04:02,200\npreset but you get the point and you can\n\n87\n00:03:59,760 --> 00:04:05,350\nalso have like glide control on that\n\n88\n00:04:02,200 --> 00:04:05,350\n[Music]\n\n89\n00:04:07,280 --> 00:04:10,959\noh nice\n\n90\n00:04:08,879 --> 00:04:14,599\nnot the best sound example maybe because\n\n91\n00:04:10,959 --> 00:04:14,599\nwe need some sustain\n\n92\n00:04:14,959 --> 00:04:18,639\nthere you go\n\n93\n00:04:17,120 --> 00:04:22,000\nyou can actually modulate everything\n\n94\n00:04:18,639 --> 00:04:23,680\nwith helium too anything you like every\n\n95\n00:04:22,000 --> 00:04:25,759\naspect here let's\n\n96\n00:04:23,680 --> 00:04:28,759\nand is that in\n\n97\n00:04:25,759 --> 00:04:28,759\nis\n\n98\n00:04:28,780 --> 00:04:33,520\n[Music]\n\n99\n00:04:31,680 --> 00:04:35,919\nyou can choose that there is a mode\n\n100\n00:04:33,520 --> 00:04:38,080\nwhere there is a menu\n\n101\n00:04:35,919 --> 00:04:40,320\nway to choose if it's unipolar or\n\n102\n00:04:38,080 --> 00:04:41,759\nbipolar you can also choose its rate if\n\n103\n00:04:40,320 --> 00:04:44,560\nit's high rate low rate is that per\n\n104\n00:04:41,759 --> 00:04:46,720\nparameter or just a global per parameter\n\n105\n00:04:44,560 --> 00:04:49,199\noh nice per parameter\n\n106\n00:04:46,720 --> 00:04:50,880\nthe rate is global yes but the level\n\n107\n00:04:49,199 --> 00:04:51,680\nit's per parameter\n\n108\n00:04:50,880 --> 00:04:53,199\nso\n\n109\n00:04:51,680 --> 00:04:54,560\nyeah\n\n110\n00:04:53,199 --> 00:04:55,759\nand you can have key tracking of that\n\n111\n00:04:54,560 --> 00:04:56,880\ntoo it's\n\n112\n00:04:55,759 --> 00:04:58,720\nyou can get pretty complex on\n\n113\n00:04:56,880 --> 00:05:00,800\nmodulations and you can do the same\n\n114\n00:04:58,720 --> 00:05:02,160\nthing for modulation wheel velocity and\n\n115\n00:05:00,800 --> 00:05:04,320\nafter that so\n\n116\n00:05:02,160 --> 00:05:05,600\nif i go and choose modulation wheel and\n\n117\n00:05:04,320 --> 00:05:08,560\ni see\n\n118\n00:05:05,600 --> 00:05:12,080\ni want to control the\n\n119\n00:05:08,560 --> 00:05:16,320\nthe cutoff and the resonance and\n\n120\n00:05:12,080 --> 00:05:16,320\nand the tuna mode so\n\n121\n00:05:20,800 --> 00:05:25,039\ninteresting yeah it was an interesting\n\n122\n00:05:22,960 --> 00:05:28,000\nsound not the best one but you get the\n\n123\n00:05:25,039 --> 00:05:29,759\npoint yeah fantastic so well this sounds\n\n124\n00:05:28,000 --> 00:05:30,800\nexcellent i mean uh wish you all the\n\n125\n00:05:29,759 --> 00:05:34,039\nbest with it\n\n126\n00:05:30,800 --> 00:05:34,039\nthank you\n\n127\n00:05:36,770 --> 00:05:44,550\n[Music]\n\n128\n00:05:50,750 --> 00:06:12,400\n[Music]\n\n129\n00:06:10,319 --> 00:06:12,400\nyou\n\n"EDIT I found that in Python 3 strings are preceded by the b identifier which seems to make them behave differently than in Python 2.7This script runs fine on 2.7. But I guess it would be good to know how to deal with the issue in Python 3