python - simple scraper kodi addon syntax failure regex "list exceeds" -


i trying simple kodi addon running limited code knowledge have.

i took plugin chefkoch ( github.com/kodinerds/repo/blob…deo.chefkoch_de-2.0.7.zip ) example , rebuilt it, scrapes "http://www.multimedia.ethz.ch/speakers/d_arch" , gives me list of semester child pages "class = "sub-lev3" ". when click links want to list of videos in semester. can until point of choosing semester in "def listvideos" appears have problem.

i think first 3 lines 1 make problems, tells list exceed failures in kodi log.

   content = geturl(url)        tbody = re.search('<tbody>(.*?)</tbody>', content, re.dotall).group(1)        spl = tbody.split('<tr>')     in range(1, len(spl), 1):             entry = spl[i]             match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.dotall).findall(entry)             url = match[0]             match = re.compile('<span>(.+?)<', re.dotall).findall(entry)             title = cleantitle(match[0])             addlink(title, url, 'playvideo', '', '') 

the whole default.py code

#!/usr/bin/python     # -*- coding: utf-8 -*-     import urllib     import urllib2     import socket     import sys     import re     import xbmcplugin     import xbmcgui     import xbmcaddon     socket.setdefaulttimeout(30)     pluginhandle = int(sys.argv[1])     settings = xbmcaddon.addon(id='plugin.video.architektur')     translation = settings.getlocalizedstring     forceviewmode = settings.getsetting("forceviewmode") == "true"     viewmode = str(settings.getsetting("viewmode"))     baseurl = "http://www.multimedia.ethz.ch/speakers/d_arch/"     def index():       content = geturl(baseurl)       spl = content.split('class="subnav-lev3"')       in range(1, len(spl), 1):             entry = spl[i]             match = re.compile('href="(.+?)"', re.dotall).findall(entry)             url = match[0]             match = re.compile('title=".+?">(.+?)<', re.dotall).findall(entry)             title = cleantitle(match[0])             adddir(title, url, 'listvideos', '')       xbmcplugin.endofdirectory(pluginhandle)       if forceviewmode:             xbmc.executebuiltin('container.setviewmode('+viewmode+')')     def listvideos(url):        content = geturl(url)        tbody = re.search('<tbody>(.*?)</tbody>', content, re.dotall).group(1)        spl = tbody.split('<tr>')        in range(1, len(spl), 1):             entry = spl[i]             match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.dotall).findall(entry)             url = match[0]             match = re.compile('<span>(.+?)<', re.dotall).findall(entry)             title = cleantitle(match[0])             addlink(title, url, 'playvideo', '', '')        xbmcplugin.endofdirectory(pluginhandle)        if forceviewmode:             xbmc.executebuiltin('container.setviewmode('+viewmode+')')     def playvideo(url):         listitem = xbmcgui.listitem(path=url)         xbmcplugin.setresolvedurl(pluginhandle, true, listitem)     def cleantitle(title):         title = title.replace("<", "<").replace(">", ">").replace("&", "&").replace("'", "\\").replace(""", "\"").replace("ß", "ß").replace("–", "-")         title = title.replace("Ä", "Ä").replace("Ü", "Ü").replace("Ö", "Ö").replace("ä", "ä").replace("ü", "ü").replace("ö", "ö")         title = title.strip()         return title     def geturl(url):         req = urllib2.request(url)         req.add_header('user-agent', 'mozilla/5.0 (windows nt 6.1; rv:22.0) gecko/20100101 firefox/22.0')         response = urllib2.urlopen(req)         link = response.read()         response.close()         return link     def parameters_string_to_dict(parameters):         ''' convert parameters encoded in url dict. '''         paramdict = {}         if parameters:             parampairs = parameters[1:].split("&")             paramspair in parampairs:                 paramsplits = paramspair.split('=')                 if (len(paramsplits)) == 2:                     paramdict[paramsplits[0]] = paramsplits[1]         return paramdict     def addlink(name, url, mode, iconimage, desc=""):         u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode)         ok = true         liz = xbmcgui.listitem(name, iconimage="defaultvideo.png", thumbnailimage=iconimage)         liz.setinfo(type="video", infolabels={"title": name, "plot": desc})         liz.setproperty('isplayable', 'true')         ok = xbmcplugin.adddirectoryitem(handle=int(sys.argv[1]), url=u, listitem=liz)         return ok     def adddir(name, url, mode, iconimage):         u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode)         ok = true         liz = xbmcgui.listitem(name, iconimage="defaultfolder.png", thumbnailimage=iconimage)         liz.setinfo(type="video", infolabels={"title": name})         ok = xbmcplugin.adddirectoryitem(handle=int(sys.argv[1]), url=u, listitem=liz, isfolder=true)         return ok     params = parameters_string_to_dict(sys.argv[2])     mode = urllib.unquote_plus(params.get('mode', ''))     url = urllib.unquote_plus(params.get('url', ''))     if mode == 'listvideos':         listvideos(url)     elif mode == 'playvideo':         playvideo(url)     else:         index() 

can tell me doing wrong in syntax?


and when code working next project same homepage: tube.tugraz.at/engage/ui/brows…e&category=courses&page=1 here cant use source code of webpage , have go through pagination pages.

do have idea start one?

cheers


Comments

Popular posts from this blog

python - How to insert QWidgets in the middle of a Layout? -

python - serve multiple gunicorn django instances under nginx ubuntu -

module - Prestashop displayPaymentReturn hook url -