|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.mahout.vectorizer.encoders.FeatureVectorEncoder
public abstract class FeatureVectorEncoder
General interface for objects that record features into a feature vector.
By convention, sub-classes should provide a constructor that accepts just a field name as well as setters to customize properties of the conversion such as adding tokenizers or a weight dictionary.
| Field Summary | |
|---|---|
protected static int |
CONTINUOUS_VALUE_HASH_SEED
|
protected static int |
WORD_LIKE_VALUE_HASH_SEED
|
| Constructor Summary | |
|---|---|
protected |
FeatureVectorEncoder(String name)
|
protected |
FeatureVectorEncoder(String name,
int probes)
|
| Method Summary | |
|---|---|
abstract void |
addToVector(byte[] originalForm,
double weight,
Vector data)
|
void |
addToVector(byte[] originalForm,
Vector data)
Adds a value expressed in byte array form to a vector. |
void |
addToVector(String originalForm,
double weight,
Vector data)
Adds a weighted value expressed in string form to a vector. |
void |
addToVector(String originalForm,
Vector data)
Adds a value expressed in string form to a vector. |
abstract String |
asString(String originalForm)
Converts a value into a form that would help a human understand the internals of how the value is being interpreted. |
protected static byte[] |
bytesForString(String x)
|
String |
getName()
|
int |
getProbes()
|
protected double |
getWeight(byte[] originalForm,
double w)
|
protected int |
hash(byte[] term1,
byte[] term2,
int probe,
int numFeatures)
Hash two byte arrays and an integer into the range [0..numFeatures-1]. |
protected static int |
hash(byte[] term,
int probe,
int numFeatures)
Hash a byte array and an integer into the range [0..numFeatures-1]. |
protected int |
hash(String term,
int probe,
int numFeatures)
Hash a string and an integer into the range [0..numFeatures-1]. |
protected static int |
hash(String term1,
String term2,
int probe,
int numFeatures)
Hash two strings and an integer into the range [0..numFeatures-1]. |
protected int |
hash(String term1,
String term2,
String term3,
String term4,
int probe,
int numFeatures)
Hash four strings and an integer into the range [0..numFeatures-1]. |
protected Iterable<Integer> |
hashesForProbe(byte[] originalForm,
int dataSize,
String name,
int probe)
Returns all of the hashes for this probe. |
protected abstract int |
hashForProbe(byte[] originalForm,
int dataSize,
String name,
int probe)
Provides the unique hash for a particular probe. |
protected boolean |
isTraceEnabled()
|
void |
setProbes(int probes)
Sets the number of locations in the feature vector that a value should be in. |
void |
setTraceDictionary(Map<String,Set<Integer>> traceDictionary)
|
protected void |
trace(byte[] subName,
int n)
|
protected void |
trace(String subName,
int n)
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected static final int CONTINUOUS_VALUE_HASH_SEED
protected static final int WORD_LIKE_VALUE_HASH_SEED
| Constructor Detail |
|---|
protected FeatureVectorEncoder(String name)
protected FeatureVectorEncoder(String name,
int probes)
| Method Detail |
|---|
public void addToVector(String originalForm,
Vector data)
originalForm - The original form of the value as a string.data - The vector to which the value should be added.
public void addToVector(byte[] originalForm,
Vector data)
originalForm - The original form of the value as a byte array.data - The vector to which the value should be added.
public void addToVector(String originalForm,
double weight,
Vector data)
originalForm - The original form of the value as a string.weight - The weight to be applied to this feature.data - The vector to which the value should be added.
public abstract void addToVector(byte[] originalForm,
double weight,
Vector data)
protected abstract int hashForProbe(byte[] originalForm,
int dataSize,
String name,
int probe)
originalForm - The original byte array valuedataSize - The length of the vector being encodedname - The name of the variable being encodedprobe - The probe number
protected Iterable<Integer> hashesForProbe(byte[] originalForm,
int dataSize,
String name,
int probe)
originalForm - The original byte array value.dataSize - The length of the vector being encodedname - The name of the variable being encodedprobe - The probe number
protected double getWeight(byte[] originalForm,
double w)
protected int hash(String term,
int probe,
int numFeatures)
term - The string.probe - An integer that modifies the resulting hash.numFeatures - The range into which the resulting hash must fit.
protected static int hash(byte[] term,
int probe,
int numFeatures)
term - The bytes.probe - An integer that modifies the resulting hash.numFeatures - The range into which the resulting hash must fit.
protected static int hash(String term1,
String term2,
int probe,
int numFeatures)
term1 - The first string.term2 - The second string.probe - An integer that modifies the resulting hash.numFeatures - The range into which the resulting hash must fit.
protected int hash(byte[] term1,
byte[] term2,
int probe,
int numFeatures)
term1 - The first string.term2 - The second string.probe - An integer that modifies the resulting hash.numFeatures - The range into which the resulting hash must fit.
protected int hash(String term1,
String term2,
String term3,
String term4,
int probe,
int numFeatures)
term1 - The first string.term2 - The second string.term3 - The third stringterm4 - And the fourth.probe - An integer that modifies the resulting hash.numFeatures - The range into which the resulting hash must fit.
public abstract String asString(String originalForm)
originalForm - The original form of the value as a string.
public int getProbes()
public void setProbes(int probes)
probes - Number of locations to increment.public String getName()
protected boolean isTraceEnabled()
protected void trace(String subName,
int n)
protected void trace(byte[] subName,
int n)
public void setTraceDictionary(Map<String,Set<Integer>> traceDictionary)
protected static byte[] bytesForString(String x)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||