DaFi4
0
Q:

Normalize a vector to have unit norm using the given p-norm

# Normalize a vector to have unit norm using the given p-norm

from pyspark.ml.linalg import Vectors
svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]), 
                             svec)], ["dense", "sparse"])
normalizer = Normalizer(p=2.0, inputCol="dense", 
                        outputCol="features")
normalizer.transform(df).head().features
# DenseVector([0.6, -0.8])
normalizer.setParams(inputCol="sparse", 
                     outputCol="freqs").transform(df).head().freqs
# SparseVector(4, {1: 0.8, 3: 0.6})
params = {normalizer.p: 1.0, normalizer.inputCol: "dense", 
          normalizer.outputCol: "vector"}
normalizer.transform(df, params).head().vector
# DenseVector([0.4286, -0.5714])
normalizerPath = temp_path + "/normalizer"
normalizer.save(normalizerPath)
loadedNormalizer = Normalizer.load(normalizerPath)
loadedNormalizer.getP() == normalizer.getP()
# True
0

New to Communities?

Join the community