Scala - Collection exercise - UP Election results analysis

scala

#1

Hi everyone…

I’ve completed with 5 parts of Scala Collections exercise and though of sharing them here on discuss (with quick search didn’t find a single post with all of these)… so just this might help some to cross check their solutions… this is first take and might refine as I learn more about Scala & Spark… (still exploring other functions/methods of Scala which were not covered in Scala teaching videos) so please be gentle on long solutions :slight_smile:

package demo
import scala.io.Source

object Elections {

val fileName = "/Users/ArunKumar/Downloads/ls2014.tsv"
val results = Source.fromFile(fileName).getLines.drop(1)

// Get number of “None of the above (NOTA)” for each state
def getNOTAByState: Unit = {

println("^^^^^^^^^^ TASK:: Get number of “None of the above (NOTA)” for each state")
val notas = results.filter(_.split("\t")(2) == "None of the Above")
val notasByState = notas.map(f => (f.split("\t")(0), f.split("\t")(10).toInt)).toList

val finalList = notasByState.groupBy(rec => rec._1).
                    map(rec => (rec._1, rec._2.map(_._2).reduce((a, b) => a + b))).toList.
                    sortBy(-_._2)

finalList.foreach(rec => println(rec._1 + "\t" + rec._2))

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}

// Exercise 5 – Get top 10 candidates by margin (number of votes for winner – number of votes for 1st runner)
def top10ByMargin: Unit = {

println("^^^^^^^^^^ Exercise 5:: Get top 10 candidates by margin (number of votes for winner – number of votes for 1st runner")

val srcMap = results.map(m => (m.split("\t")(0), m.split("\t")(1), m.split("\t")(2), m.split("\t")(10))).toList

srcMap.groupBy(g => (g._1, g._2)).map(m => (m._1, m._2.map(m1 => (m1._3, m1._4.toInt)).sortBy(s => -s._2).take(2).flatMap(f => List(f._1, f._2)))).
  map(t => (t._2(0), t._2(1).toString.toInt - t._2(3).toString.toInt)).toList.sortBy(-_._2).take(10).
  foreach(p => (println(p._1 + " ==>> " + p._2)))

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}
// Exercise 4 – Get the percentage of polled votes of each party – formula (number of votes per party across all the constituencies / total number of votes all the constituencies)
def voteShare: Unit = {

println("^^^^^^^^^^ Exercise 4:: Get the percentage of polled votes of each party – formula (number of votes per party across all the constituencies / total number of votes all the constituencies")

val srcMap = results.map(m => (m.split("\t")(6), m.split("\t")(10))).toList
  //.filterNot(f => f._2 == "total")

val totalVotes = srcMap.map(m => m._2.toInt).reduce((a, b) => a + b)

srcMap.groupBy(g => g._1).map(m => (m._1, m._2.map(im1 => im1._2.toInt).sum)).toList.
  map(im2 => (im2._1, (im2._2.toDouble/totalVotes * 100).toString + "%")).sortBy(s => s._1).foreach(f => println(f._1 + " -->> " + f._2))

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}
// Exercise 1 – Get all the distinct constituencies
def distConst: Unit = {

println("^^^^^^^^^^ Exercise 1:: Get all the distinct constituencies")

results.map(_.split("\t")(1)).toSet.foreach(println)

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}

// Exercise 2 – Get number of constituencies by state sorted in descending order by number of constituencies
def getNConstByState: Unit = {

println("^^^^^^^^^^ Exercise 2:: Get number of constituencies by state sorted in descending order by number of constituencies")

val resList = results.map(rec => (rec.split("\t")(0), rec.split("\t")(1))).toSet.toList

resList.groupBy(rec => rec._1).
    map(rec => (rec._1, rec._2.size)).toList.
    sortBy(-_._2).
    foreach(p => println(p._1 + "\t" + p._2))

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}

// Exercise 3 – Get the number of seats for each party in each state – output should be state,bjp,inc,….
def getSeatTally: Unit = {

println("^^^^^^^^^^ Exercise 3:: Get the number of seats for each party in each state – output should be state,bjp,inc,….")

val srcMap = results.map(m => (m.split("\t")(0), m.split("\t")(1), m.split("\t")(6), m.split("\t")(10))).toList

//val grp = srcMap.groupBy(g => (g._1, g._2)).map(m => (m._1, m._2.map(m1 => (m1._3, m1._4))))
//val detailByStateConst = grp.map(m => (m._1, m._2.reduce((r1, r2) => {if (r1._2.toInt > r2._2.toInt) r1 else r2 } ))).map(m1 => (m1._1._1, m1._2._1, m1._1._2))
//detailByStateConst.groupBy(g => (g._1, g._2)).toList.map(m => (m._1._1, m._1._2, m._2.size)).groupBy(_._1).map(m1 => (m1._1, m1._2.map(m1L => m1L._2 + " - " + m1L._3))).foreach(f => println(f._1 + " -->> " + f._2.mkString(", ")))


srcMap.groupBy(_._1).map(m => (m._1, m._2.groupBy(_._2).map(m1 => (m1._1, m1._2.
                              map(m2 => (m2._3, m2._4)).reduce((a1, a2) => {if (a1._2.toInt > a2._2.toInt) a1 else a2}))))
                        ).map(m => (m._1, m._2.values.toList.groupBy(_._1).map(a => (a._1, a._2.size)))).
                        foreach(f => println(f._1 + " -->> " + f._2.mkString(", ")))

println("^^^^^^^^^^ End of RESULTS ^^^^^^^^^^^^^^")

}

def main(args: Array[String]): Unit = {
if (args(0).toLowerCase == “notabystate”)

  getNOTAByState

else if (args(0).toLowerCase == "1")

  distConst

else if (args(0).toLowerCase == "2")

  getNConstByState

else if (args(0).toLowerCase == "3")

  getSeatTally

else if (args(0).toLowerCase == "4")

  voteShare

else if (args(0).toLowerCase == "5")

  top10ByMargin

else
  println("==>>>>>>> nothing to process for now....")

}
}


Exercises - Scala Collections
#2

Hello,

How we can extract or fetch email ID, From, Subject, Cc from a text file. Below is data example.

From: Devendra Shukl
Sent: Friday Januar 12, 2018 06:07 PM
To: Devendra Shukla < Devendra Shukla@gmail.com>;Hari Kumar <Hari Kumar@gmail.com>; Ankit Singh aravind.singh123@gmail.com;
Subject: Solution

Hi all

What’s the Solution to this problem?

If I want to Extract From and sent, To and body part of email so how can we fetch using spark and scala.