from operator import add s = 'Hi hi hi bye bye bye word count' seq = s.split() # ['Hi', 'hi', 'hi', 'bye', 'bye', 'bye', 'word', 'count'] sc.parallelize(seq)\ .map(lambda word: (word, 1))\ .reduceByKey(add)\ .collect()