How to launch Pyspark2 shell with avro package in labs?

scala> orders.write.format(“avro”).save("/user/dnagarat/order_avro_t")
java.lang.NoSuchMethodError: org.apache.spark.sql.internal.SQLConf.avroCompressionCodec()Ljava/lang/String;
at org.apache.spark.sql.avro.AvroOptions$$anonfun$5.apply(AvroOptions.scala:80)
at org.apache.spark.sql.avro.AvroOptions$$anonfun$5.apply(AvroOptions.scala:80)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.avro.AvroOptions.(AvroOptions.scala:80)
at org.apache.spark.sql.avro.AvroOptions.(AvroOptions.scala:34)
at org.apache.spark.sql.avro.AvroFileFormat.prepareWrite(AvroFileFormat.scala:115)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:140)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:154)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:225)
… 49 elided

Hi @vijaysathisdheeba_ro,

please launch your spark shell with avro package by using below command-
spark2-shell --master yarn --conf spark.ui.port=0 --packages com.databricks:spark-avro_2.11:4.0.0

to save datain avro format use-
orders.write.format("com.databricks.spark.avro").save("/hdfs_dir_name/../")

1 Like

Thanks Shubam, This works out !