apache spark - NegativeArraySizeException while training prediction io universal recommender -
i trying deploy prediction io system. getting negativearraysizeexception while training phase.
help appreciated.
the events have pushed has entitytype user , targetentitytype item verified
http://localhost:7070/events.json?accesskey=<myacccesskey> [{ "eventid": "aax2w8b2ufaxuydlzyigbgaaavgabv1uhz7erglatba", "event": "purchase", "entitytype": "user", "entityid": "b571c84da7104d339a436b40d07ba59c", "targetentitytype": "item", "targetentityid": "00572208a2e742f397f7e082aa40ae2e", "properties": {}, "eventtime": "2016-10-26t08:05:01.422z", "creationtime": "2016-10-26t08:05:01.423z" }] [info] [engine] extracting datasource params... [info] [workflowutils$] no 'name' found. default empty string used. [info] [engine] datasource params: (,datasourceparams(juggernautrecommendor,list(purchase, view))) [info] [engine] extracting preparator params... [info] [engine] preparator params: (,empty) [info] [engine] extracting serving params... [info] [engine] serving params: (,empty) [info] [remoting] starting remoting [info] [remoting] remoting started; listening on addresses :[akka.tcp://sparkdriver@172.17.0.2:34162] [warn] [metricssystem] using default name dagscheduler source because spark.app.id not set. [info] [engine$] engineworkflow.train [info] [engine$] datasource: com.juggernaut.datasource@5c1b89ac [info] [engine$] preparator: com.juggernaut.preparator@2b79c8ff [info] [engine$] algorithmlist: list(com.juggernaut.uralgorithm@5d14e99e) [info] [engine$] data sanity check on. [info] [engine$] com.juggernaut.trainingdata not support data sanity check. skipping check. [info] [engine$] com.juggernaut.prepareddata not support data sanity check. skipping check. [info] [uralgorithm] actions read creating correlators [error] [executor] exception in task 0.0 in stage 29.0 (tid 20) [warn] [tasksetmanager] lost task 0.0 in stage 29.0 (tid 20, localhost): java.lang.negativearraysizeexception @ org.apache.mahout.math.densevector.<init>(densevector.java:57) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:78) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:77) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.mappartitionsrdd.compute(mappartitionsrdd.scala:38) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:297) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:264) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:66) @ org.apache.spark.scheduler.task.run(task.scala:88) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:214) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) @ java.lang.thread.run(thread.java:745) [error] [tasksetmanager] task 0 in stage 29.0 failed 1 times; aborting job exception in thread "main" org.apache.spark.sparkexception: job aborted due stage failure: task 0 in stage 29.0 failed 1 times, recent failure: lost task 0.0 in stage 29.0 (tid 20, localhost): java.lang.negativearraysizeexception @ org.apache.mahout.math.densevector.<init>(densevector.java:57) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:78) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:77) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.mappartitionsrdd.compute(mappartitionsrdd.scala:38) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:297) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:264) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:66) @ org.apache.spark.scheduler.task.run(task.scala:88) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:214) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) @ java.lang.thread.run(thread.java:745) driver stacktrace: @ org.apache.spark.scheduler.dagscheduler.org$apache$spark$scheduler$dagscheduler$$failjobandindependentstages(dagscheduler.scala:1283) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1271) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1270) @ scala.collection.mutable.resizablearray$class.foreach(resizablearray.scala:59) @ scala.collection.mutable.arraybuffer.foreach(arraybuffer.scala:47) @ org.apache.spark.scheduler.dagscheduler.abortstage(dagscheduler.scala:1270) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:697) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:697) @ scala.option.foreach(option.scala:236) @ org.apache.spark.scheduler.dagscheduler.handletasksetfailed(dagscheduler.scala:697) @ org.apache.spark.scheduler.dagschedulereventprocessloop.doonreceive(dagscheduler.scala:1496) @ org.apache.spark.scheduler.dagschedulereventprocessloop.onreceive(dagscheduler.scala:1458) @ org.apache.spark.scheduler.dagschedulereventprocessloop.onreceive(dagscheduler.scala:1447) @ org.apache.spark.util.eventloop$$anon$1.run(eventloop.scala:48) @ org.apache.spark.scheduler.dagscheduler.runjob(dagscheduler.scala:567) @ org.apache.spark.sparkcontext.runjob(sparkcontext.scala:1822) @ org.apache.spark.sparkcontext.runjob(sparkcontext.scala:1942) @ org.apache.spark.rdd.rdd$$anonfun$reduce$1.apply(rdd.scala:1003) @ org.apache.spark.rdd.rddoperationscope$.withscope(rddoperationscope.scala:147) @ org.apache.spark.rdd.rddoperationscope$.withscope(rddoperationscope.scala:108) @ org.apache.spark.rdd.rdd.withscope(rdd.scala:306) @ org.apache.spark.rdd.rdd.reduce(rdd.scala:985) @ org.apache.mahout.sparkbindings.sparkengine$.numnonzeroelementspercolumn(sparkengine.scala:86) @ org.apache.mahout.math.drm.checkpointedops.numnonzeroelementspercolumn(checkpointedops.scala:37) @ org.apache.mahout.math.cf.similarityanalysis$.sampledownandbinarize(similarityanalysis.scala:286) @ org.apache.mahout.math.cf.similarityanalysis$$anonfun$cooccurrences$1.apply(similarityanalysis.scala:89) @ org.apache.mahout.math.cf.similarityanalysis$$anonfun$cooccurrences$1.apply(similarityanalysis.scala:84) @ scala.collection.indexedseqoptimized$class.foreach(indexedseqoptimized.scala:33) @ scala.collection.mutable.arrayops$ofref.foreach(arrayops.scala:108) @ org.apache.mahout.math.cf.similarityanalysis$.cooccurrences(similarityanalysis.scala:84) @ org.apache.mahout.math.cf.similarityanalysis$.cooccurrencesidss(similarityanalysis.scala:141) @ com.juggernaut.uralgorithm.calcall(uralgorithm.scala:143) @ com.juggernaut.uralgorithm.train(uralgorithm.scala:117) @ com.juggernaut.uralgorithm.train(uralgorithm.scala:102) @ io.prediction.controller.p2lalgorithm.trainbase(p2lalgorithm.scala:46) @ io.prediction.controller.engine$$anonfun$18.apply(engine.scala:689) @ io.prediction.controller.engine$$anonfun$18.apply(engine.scala:689) @ scala.collection.traversablelike$$anonfun$map$1.apply(traversablelike.scala:244) @ scala.collection.traversablelike$$anonfun$map$1.apply(traversablelike.scala:244) @ scala.collection.immutable.list.foreach(list.scala:318) @ scala.collection.traversablelike$class.map(traversablelike.scala:244) @ scala.collection.abstracttraversable.map(traversable.scala:105) @ io.prediction.controller.engine$.train(engine.scala:689) @ io.prediction.controller.engine.train(engine.scala:174) @ io.prediction.workflow.coreworkflow$.runtrain(coreworkflow.scala:65) @ io.prediction.workflow.createworkflow$.main(createworkflow.scala:247) @ io.prediction.workflow.createworkflow.main(createworkflow.scala) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:62) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:498) @ org.apache.spark.deploy.sparksubmit$.org$apache$spark$deploy$sparksubmit$$runmain(sparksubmit.scala:672) @ org.apache.spark.deploy.sparksubmit$.dorunmain$1(sparksubmit.scala:180) @ org.apache.spark.deploy.sparksubmit$.submit(sparksubmit.scala:205) @ org.apache.spark.deploy.sparksubmit$.main(sparksubmit.scala:120) @ org.apache.spark.deploy.sparksubmit.main(sparksubmit.scala) caused by: java.lang.negativearraysizeexception @ org.apache.mahout.math.densevector.<init>(densevector.java:57) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:78) @ org.apache.mahout.sparkbindings.sparkengine$$anonfun$5.apply(sparkengine.scala:77) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.rdd$$anonfun$mappartitions$1$$anonfun$apply$17.apply(rdd.scala:706) @ org.apache.spark.rdd.mappartitionsrdd.compute(mappartitionsrdd.scala:38) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:297) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:264) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:66) @ org.apache.spark.scheduler.task.run(task.scala:88) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:214) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) @ java.lang.thread.run(thread.java:745)
all events defined in engine.json must have atleast 1 event in data set.
https://groups.google.com/forum/#!topic/predictionio-user/fdgoy4discg
Comments
Post a Comment