Q:

converts the input array of strings into an array of n-grams

# converts the input array of strings into an array of n-grams

df = spark.createDataFrame([Row(inputTokens=[
  "a", "b", "c", "d", "e"])])
ngram = NGram(n=2, inputCol="inputTokens", outputCol="nGrams")
ngram.transform(df).head()
# Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'ab', u'b c', u'c d', u'd e'])

# Change n-gram Length
ngram.setParams(n=4).transform(df).head()
# Row(inputTokens=[u'a', u'b', u'c', u'd', u'd', u'e'], nGrams=[u'a b c d', u'b c d e'])

# Temporarily modify output column.
ngram.transform(df, {ngram.outputCol: "output"}).head()
# Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], output=[u'a b c d', u'b c d e'])
ngram.transform(df).head()
# Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'a b c d', u'b c d e'])

# Must use keyword arguments to specify params.
ngram.setParams("text")
# Traceback (most recent call last):
#	...
# TypeError: Method setParams forces keyword arguments.
ngramPath = temp_path + "/ngram"
ngram.save(ngramPath)
loadedNGram = NGram.load(ngramPath)
loadedNGram.getN() == ngram.getN()
# True
0

New to Communities?

Join the community