Unable to perform data frame operations on DF

dataframes

#1

scala> val dF = sqlContext.sql(“select * from employeeTable”).collect()
dF: Array[org.apache.spark.sql.Row] = Array([id,name,age,gender,level,salary], [1,Joseph,23,m,1,50000], [2,Sharma,25,m,1,55000], [3,Varma,26,m,2,60000], [4,Aj,27,m,3,65000], [5,Varun,22,m,1,45000], [6,Ajay,29,m,3,95000], [7,Vijay,31,m,4,125000], [8,Kaushik,33,m,5,145000], [9,Gopi,21,m,1,25000], [10,Kumar,27,m,3,75000], [11,Kumari,21,f,1,35000], [12,Tina,22,f,2,45000], [13,Alexa,23,f,3,55000], [14,Casey,25,f,1,25000])

scala> dF.show()
:28: error: value show is not a member of Array[org.apache.spark.sql.Row]
dF.show()
^

scala> dF.printSchema()
:28: error: value printSchema is not a member of Array[org.apache.spark.sql.Row]
dF.printSchema()
^

scala> dF.head()
:28: error: not enough arguments for method apply: (i: Int)Any in trait Row.
Unspecified value parameter i.
dF.head()
^

scala> dF.head(1)
res3: Any = name

scala> dF.head(5)
res4: Any = salary

scala> dF.describe()
:28: error: value describe is not a member of Array[org.apache.spark.sql.Row]
dF.describe()
^

scala> dF.count()
:28: error: not enough arguments for method count: (p: org.apache.spark.sql.Row => Boolean)Int.
Unspecified value parameter p.
dF.count()
^

scala> dF.count(5)
:28: error: type mismatch;
found : Int(5)
required: org.apache.spark.sql.Row => Boolean
dF.count(5)
^

scala> dF.distinct()
:28: error: not enough arguments for method apply: (i: Int)org.apache.spark.sql.Row in class Array.
Unspecified value parameter i.
dF.distinct()
^

scala> dF.collect()
:28: error: not enough arguments for method collect: (pf: PartialFunction[org.apache.spark.sql.Row,B])(implicit bf: scala.collection.generic.CanBuildFrom[Array[org.apache.spark.sql.Row],B,That])That.
Unspecified value parameter pf.
dF.collect()
^

scala> dF.head(3)
res10: Any = gender


#2

val dF = sqlContext.sql(“select * from employeeTable”).collect()

Remove collect() function from your statement

val dF = sqlContext.sql(“select * from employeeTable”)
Now it should work
dF.show
dF.count
dF.printSchema


#3

Thanks for the help @naveenraj