CCA-175 Spark and Hadoop Developer-Python


Exercises: “Get those order items where order item subtotal is not equal to order item quantity multiplied by order item product price”:
My solution is:

def readData(dataPath):
dataFile = open(dataPath)
dataStr =
dataList = dataStr.splitlines()
return dataList

orderItemsPath = “/data/retail_db/order_items/part-00000”
orderItems = readData(orderItemsPath)

def OrdersWithWrongSubtotals(orderItems):
OrdersWithWrongSubtotal = 0
for orderItem in orderItems:
orderItemId = int(orderItem.split(’,’)[0])
# print(orderItemId)
orderQty = int(orderItem.split(’,’)[3])
unitPrice = float(orderItem.split(’,’)[5])
derivedSubtotal = float(orderQty * unitPrice)
givenSubtotal = float(orderItem.split(’,’)[4])
if(derivedSubtotal != givenSubtotal):
#print([orderItemId, derivedSubtotal, givenSubtotal])
OrdersWithWrongSubtotal += 1
return OrdersWithWrongSubtotal

ordersWithWrongSubtotal = OrdersWithWrongSubtotals(orderItems)

But I’m sure ther’s a better way of solving this problem. Please help show me. Thanks