Scala Excercise with solution

Ex. Task – Get number of “None of the above (NOTA)” for each state


import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val notas = results.filter(rec => rec.split(”\t")(2) == “None of the Above”)
val notabystate = notas.map(rec => (rec.split("\t")(0), rec.split("\t")(10).toInt))
val i = notabystate.toList
val l = i.groupBy(r => r._1)
val f = l.mapValues(r=> r.map(r=>r._2).sum).toList
val notabyState = f.sortBy(r => -r._2)

  1. Get all the distinct constituencies :

import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val const = results.map(r => r.split(”\t")(1))
val dist_const = const.toList
solution 1 :
val dist_contL = dist_const.distinct – This will give you the distinct constituencies in List
solution 2 :
val dist_contS = dist_const.toSet – This will give you the distinct constituencies in List

Result Set

constituency
Adilabad
Peddapalle
Karimnagar
Nizamabad
Zahirabad
Medak
Malkajgiri
Secundrabad
Hyderabad


2 Get number of constituencies by state sorted in descending order by number of constituencies

import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val constbyState = results.map(r => (r.split(”\t")(0),r.split("\t")(1)))
val constbyStateL = constbyState.toList
val groupByState = constbyStateL.groupBy(r => r._1)
val f = groupByState.mapValues(r=> r.map(r=>r._2).size)
val l = f.toList
val ordNo_Of_Const_desc = l.sortBy(r => -r._2)

Result Set:

(Uttar Pradesh,1368)
(Maharashtra,945)
(Tamil Nadu,884)
(Bihar,647)
(Andhra Pradesh,640)
(West Bengal,514)
(Karnataka,462)
(Madhya Pradesh,407)
(Gujarat,360)
(Rajasthan,345)
(Kerala,289)
(Punjab,266)
(Jharkhand,254)
(Haryana,240)
(Assam,176)
(NCT OF Delhi,157)
(Jammu & Kashmir,83)
(Uttarakhand,79)
(Himachal Pradesh,42)
(Puducherry,31)
(Tripura,27)
(Goa,21)
(Manipur,20)
(Chandigarh,18)
(Andaman & Nicobar Islands,16)
(Arunachal Pradesh,13)
(Dadra & Nagar Haveli,12)
(Meghalaya,12)
(Lakshadweep,7)
(Sikkim,7)
(Daman & Diu,5)
(Mizoram,4)
(Nagaland,4)
(state,1)


3 Get the number of seats for each party in each state – output should be state,bjp,inc,

import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val state_party = results.map(r => (r.split(”\t")(0),r.split("\t")(6)))
val state_party_List = state_party.toList
val groupBy_S_P = state_party_List.groupBy(r=> (r._1,r._2))
val f = groupBy_S_P.mapValues(r=> r.map(r => r._2).size.toInt)
val l = f.toList
l.sortBy(r=> -r._2).take(25).foreach(println)

Result set

((Tamil Nadu,IND),517)
((Maharashtra,IND),444)
((Uttar Pradesh,IND),373)
((Andhra Pradesh,IND),233)
((Karnataka,IND),196)
((Bihar,IND),169)
((Gujarat,IND),158)
((Madhya Pradesh,IND),126)
((Kerala,IND),123)
((Haryana,IND),121)
((Punjab,IND),118)
((Rajasthan,IND),117)
((West Bengal,IND),83)
((Uttar Pradesh,BSP),80)
((Uttar Pradesh,NOTA),80)
((Uttar Pradesh,SP),78)
((Uttar Pradesh,BJP),78)
((Jharkhand,IND),77)
((Uttar Pradesh,AAAP),76)
((Uttar Pradesh,INC),67)
((NCT OF Delhi,IND),58)
((Assam,IND),58)
((Uttar Pradesh,BMUP),57)
((Maharashtra,BMUP),48)
((Maharashtra,AAAP),48)


4 Get the percentage of polled votes of each party – formulla (number of votes per party across all the constituencies / total number of votes all the constituencies)

import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val party_votes = results.map(r => (r.split(”\t")(6),r.split("\t")(10).toInt))
val partyvotesL = party_votes.toList
val groupBy_party = party_votesL.groupBy(r => r._1)
val tot_per_party = groupBy_party.mapValues(r => r.map(r => r._2).sum)
val tot = partyvotesL.map(r => r.2).reduce((a,b) => a+b)
val i = tot_per_party.mapValues(
* 100 / tot.toDouble)
i.take(25).foreach(println)

Result Set

(BKrD,8.172786411953772E-4)
(bjdi,2.1499235784857218E-4)
(RAPa,6.553613198103166E-4)
(JMM,0.2817226781500328)
(JVM,0.30379150907258906)
(MKUP,6.922830843066725E-5)
(ABHKP,1.317260868750196E-4)
(BhNP,3.846017135037069E-4)
(JMBP,0.011151142081326478)
(PRISM,5.180585080894932E-4)
(SP(I),0.007983946970623452)
(MAMAK,0.045513574475171924)
(MaJP,1.917239541815979E-4)
(BVLP,3.096043793704841E-5)
(grac,5.588262897208862E-4)
(PECP,0.09975106961774843)
(BSP(A),0.002135308713372581)
(IPFT,0.004101008071090027)
(BBM,0.06939253336233332)
(RADP,0.0073693534324445285)
(RsAD,0.00237799239459342)
(BEMP,0.001678594178586929)
(BJP,-2.0674132114119237)
(BSP,-3.9451751411205604)
(AKAKRP,3.0152774338690623E-4)


5 Get top 10 candidates by margin (number of votes for winner – number of votes for 1st runner)

import scala.io.Source
val fileName = “E:/hadoop/datasets/vote.tsv"
val results = Source.fromFile(fileName).getLines
val resultset = results.map(r => (r.split(”\t")(0),r.split("\t")(1)
,r.split("\t")(2),r.split("\t")(10).toInt))
val result1 = resultset.toList
val resultGroup = result1.groupBy(r=>(r._1,r._2))
val resultO = resultGroup.mapValues(r => r.sortBy(r => -r._4).take(2))
val resultM = resultO.mapValues(r => (r.map(r => r._3).take(1),
r.map(r => r._4).reduce((a,b) => a - b)))

resultM.take(10).foreach(println)

Result Set

((Assam,Dibrugarh),(List(RAMESWAR TELI),185347))
((Andhra Pradesh,Anantapur),(List(J.C. DIVAKAR REDDI),61991))
((Rajasthan,Jodhpur),(List(GAJENDRASINGH SHEKHAWAT ),410051))
((Rajasthan,Barmer),(List(COL. SONA RAM),87461))
((Punjab,Anandpur Sahib),(List(PREM SINGH CHANDUMAJRA),23697))
((Punjab,Bathinda),(List(HARSIMRAT KAUR BADAL),19395))
((Tamil Nadu,Tiruppur),(List(V.SATHYABAMA),179315))
((Kerala,Mavelikkara ),(List(KODIKUNNIL SURESH),32737))
((West Bengal,Maldaha Uttar),(List(MAUSAM NOOR),65705))
((Maharashtra,Shirur),(List(ADHALRAO SHIVAJI DATTATREY),301814))