Number format exception in dataframe code

apache-spark

#1

I have added the file ls2014.tsv in my project locally . Now when i am trying to run the following code i am getting following number format exception for all numeric fields (for ex :- int and float ones )

error ;- Caused by: java.lang.NumberFormatException: For input string: “age”

package sparkdemo

import com.typesafe.config.ConfigFactory

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

import org.apache.hadoop.fs._
import org.apache.spark.sql._
import org.apache.spark.sql.functions._

object Electrionresults {

case class lsv2014(
state: String,
constituency: String,
candidate_name: String,
sex: String,
age:String,
category: String,
partyname: String,
partysymbol: String,
general: String,
postal: Int,
total: Int,
pct_of_total_votes: Float,
pct_of_polled_votes: Float,
totalvoters: Int
)

def main(args: Array[String]): Unit = {

val conf = new SparkConf().setMaster("local").setAppName("Spark SQL Demo")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
sqlContext.setConf("spark.sql.shuffle.partitions", "2")
import sqlContext.implicits._
val ls2014DF = sc.textFile("ls2014.tsv").map(rec => {
  val a = rec.split("\\t")
  lsv2014(a(0).toString,a(1).toString,a(2).toString,a(3).toString,a(4).toInt,
    a(5).toString,a(6).toString,a(7).toString,a(8).toString,
    a(9).toInt,a(10).toInt,
    a(11).toFloat,a(12).toFloat
   ,a(13).toInt
  )
}).toDF()

ls2014DF.registerTempTable("results")

sqlContext.sql("select * from results limit 10").foreach(println)

}

}


#2

@pragtyag:

In case class lsv2014 you defined age:String , but
in val ls2014DF you defined as a(4).toInt

In DF also, change to String .I guess this is error.

BTW, in DF need to explicitly type cast .toInt if Int.

Thanks
Venkat


#3

Hello venkat

By mistake i pasted wrong code . Actually this is the code where in the case class alos the type of integer fields is int and also in the main obect logic it is getting converted to toInt . Still it is giving same error . On the contrary if i change the type to string in case class in the object apply toString it works fine .

package sparkdemo

import com.typesafe.config.ConfigFactory

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

import org.apache.hadoop.fs._
import org.apache.spark.sql._
import org.apache.spark.sql.functions._

object Electrionresults {

case class lsv2014(
state: String,
constituency: String,
candidate_name: String,
sex: String,
age:Int,
category: String,
partyname: String,
partysymbol: String,
general: String,
postal: Int,
total: Int,
pct_of_total_votes: Float,
pct_of_polled_votes: Float,
totalvoters: Int
)

def main(args: Array[String]): Unit = {

val conf = new SparkConf().setMaster("local").setAppName("Spark SQL Demo")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
sqlContext.setConf("spark.sql.shuffle.partitions", "2")
import sqlContext.implicits._
val ls2014DF = sc.textFile("ls2014.tsv").map(rec => {
  val a = rec.split("\\t")
  lsv2014(a(0).toString,a(1).toString,a(2).toString,a(3).toString,a(4).toInt,
    a(5).toString,a(6).toString,a(7).toString,a(8).toString,
    a(9).toInt,a(10).toInt,
    a(11).toFloat,a(12).toFloat
   ,a(13).toInt
  )
}).toDF()

ls2014DF.registerTempTable("results")

sqlContext.sql("select * from results limit 10").foreach(println)

}

}