# -*- coding: cp1252 -*- from pyspark import SparkConf, SparkContext import collections sConf = SparkConf().setMaster("local").setAppName("RatingsRDDApp") sContext = SparkContext(conf = sConf) alllinesRDD = sContext.textFile("file:///D:/dumps/BigData/ml-100k/u.data") allratingsRDD =alllinesRDD.map(lambda line: line.split()[2]) resultRDD= allratingsRDD.countByValue() sortedResultsRDD = collections.OrderedDict(sorted(resultRDD.items())) for rddKey, rddValue in sortedResultsRDD.items(): print("%s %i" % (rddKey, rddValue))