python - simple scraper kodi addon syntax failure regex "list exceeds" -
i trying simple kodi addon running limited code knowledge have.
i took plugin chefkoch ( github.com/kodinerds/repo/blob…deo.chefkoch_de-2.0.7.zip ) example , rebuilt it, scrapes "http://www.multimedia.ethz.ch/speakers/d_arch" , gives me list of semester child pages "class = "sub-lev3" ". when click links want to list of videos in semester. can until point of choosing semester in "def listvideos" appears have problem.
i think first 3 lines 1 make problems, tells list exceed failures in kodi log.
content = geturl(url) tbody = re.search('<tbody>(.*?)</tbody>', content, re.dotall).group(1) spl = tbody.split('<tr>') in range(1, len(spl), 1): entry = spl[i] match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.dotall).findall(entry) url = match[0] match = re.compile('<span>(.+?)<', re.dotall).findall(entry) title = cleantitle(match[0]) addlink(title, url, 'playvideo', '', '')
the whole default.py code
#!/usr/bin/python # -*- coding: utf-8 -*- import urllib import urllib2 import socket import sys import re import xbmcplugin import xbmcgui import xbmcaddon socket.setdefaulttimeout(30) pluginhandle = int(sys.argv[1]) settings = xbmcaddon.addon(id='plugin.video.architektur') translation = settings.getlocalizedstring forceviewmode = settings.getsetting("forceviewmode") == "true" viewmode = str(settings.getsetting("viewmode")) baseurl = "http://www.multimedia.ethz.ch/speakers/d_arch/" def index(): content = geturl(baseurl) spl = content.split('class="subnav-lev3"') in range(1, len(spl), 1): entry = spl[i] match = re.compile('href="(.+?)"', re.dotall).findall(entry) url = match[0] match = re.compile('title=".+?">(.+?)<', re.dotall).findall(entry) title = cleantitle(match[0]) adddir(title, url, 'listvideos', '') xbmcplugin.endofdirectory(pluginhandle) if forceviewmode: xbmc.executebuiltin('container.setviewmode('+viewmode+')') def listvideos(url): content = geturl(url) tbody = re.search('<tbody>(.*?)</tbody>', content, re.dotall).group(1) spl = tbody.split('<tr>') in range(1, len(spl), 1): entry = spl[i] match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.dotall).findall(entry) url = match[0] match = re.compile('<span>(.+?)<', re.dotall).findall(entry) title = cleantitle(match[0]) addlink(title, url, 'playvideo', '', '') xbmcplugin.endofdirectory(pluginhandle) if forceviewmode: xbmc.executebuiltin('container.setviewmode('+viewmode+')') def playvideo(url): listitem = xbmcgui.listitem(path=url) xbmcplugin.setresolvedurl(pluginhandle, true, listitem) def cleantitle(title): title = title.replace("<", "<").replace(">", ">").replace("&", "&").replace("'", "\\").replace(""", "\"").replace("ß", "ß").replace("–", "-") title = title.replace("Ä", "Ä").replace("Ü", "Ü").replace("Ö", "Ö").replace("ä", "ä").replace("ü", "ü").replace("ö", "ö") title = title.strip() return title def geturl(url): req = urllib2.request(url) req.add_header('user-agent', 'mozilla/5.0 (windows nt 6.1; rv:22.0) gecko/20100101 firefox/22.0') response = urllib2.urlopen(req) link = response.read() response.close() return link def parameters_string_to_dict(parameters): ''' convert parameters encoded in url dict. ''' paramdict = {} if parameters: parampairs = parameters[1:].split("&") paramspair in parampairs: paramsplits = paramspair.split('=') if (len(paramsplits)) == 2: paramdict[paramsplits[0]] = paramsplits[1] return paramdict def addlink(name, url, mode, iconimage, desc=""): u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode) ok = true liz = xbmcgui.listitem(name, iconimage="defaultvideo.png", thumbnailimage=iconimage) liz.setinfo(type="video", infolabels={"title": name, "plot": desc}) liz.setproperty('isplayable', 'true') ok = xbmcplugin.adddirectoryitem(handle=int(sys.argv[1]), url=u, listitem=liz) return ok def adddir(name, url, mode, iconimage): u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode) ok = true liz = xbmcgui.listitem(name, iconimage="defaultfolder.png", thumbnailimage=iconimage) liz.setinfo(type="video", infolabels={"title": name}) ok = xbmcplugin.adddirectoryitem(handle=int(sys.argv[1]), url=u, listitem=liz, isfolder=true) return ok params = parameters_string_to_dict(sys.argv[2]) mode = urllib.unquote_plus(params.get('mode', '')) url = urllib.unquote_plus(params.get('url', '')) if mode == 'listvideos': listvideos(url) elif mode == 'playvideo': playvideo(url) else: index()
can tell me doing wrong in syntax?
and when code working next project same homepage: tube.tugraz.at/engage/ui/brows…e&category=courses&page=1 here cant use source code of webpage , have go through pagination pages.
do have idea start one?
cheers
Comments
Post a Comment