Not able to run the word count program as a application using spark-submit command


#1

File “/home/girishbommisetty/project/csds-spark-emr-master/wordcount.py”, line 24, in
.reduceByKey(add)
File “/usr/hdp/current/spark-client/python/lib/pyspark.zip/pyspark/rdd.py”, line 1558, in reduceByKey
File “/usr/hdp/current/spark-client/python/lib/pyspark.zip/pyspark/rdd.py”, line 1768, in combineByKey
File “/usr/hdp/current/spark-client/python/lib/pyspark.zip/pyspark/rdd.py”, line 2169, in _defaultReducePartitions
File “/usr/hdp/current/spark-client/python/lib/pyspark.zip/pyspark/rdd.py”, line 2363, in getNumPartitions
File “/usr/hdp/current/spark-client/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py”, line 813, in call
File “/usr/hdp/current/spark-client/python/lib/py4j-0.9-src.zip/py4j/protocol.py”, line 308, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o47.partitions.
: java.lang.IllegalArgumentException: java.net.UnknownHostException: user

my code is below:

command used : spark-submit --master yarn wordcount.py | tee output.txt

import sys
from operator import add
from pyspark import SparkConf, SparkContext

#for SparkConf() check out http://spark.apache.org/docs/latest/configuration.html
conf = (SparkConf()
.setMaster(“yarn-client”)
.setAppName(“WordCounter”)
.set(“spark.executor.memory”, “1g”))
sc = SparkContext(conf = conf)

print(“Launch App…”)
if name == “main”:
print(“Initiating main…”)

    inputFile = "hdfs://user/girishbommisetty/input.txt"
    print("Counting words in ", inputFile)
    lines = sc.textFile(inputFile)

    #for lambdas check out https://docs.python.org/3/tutorial/controlflow.html#lambda-expressions
    lines_nonempty = lines.filter( lambda x: len(x) > 0 )
    counts = lines_nonempty.flatMap(lambda x: x.split(' ')) \
                  .map(lambda x: (x, 1)) \
                  .reduceByKey(add)
    output = counts.collect()
    for (word, count) in output:
        print("%s: %i" % (word, count))

    sc.stop()

#2

Give HDFS full path and try now

inputFile = “hdfs://nn01.itversity.com/user/girishbommisetty/input.txt”