public class DataSet extends Object implements javax.visrec.ml.data.DataSet<DataSetRow>, Serializable
http://openforecast.sourceforge.net/docs/net/sourceforge/openforecast/DataSet.html
,
Serialized FormConstructor and Description |
---|
DataSet(int inputSize)
Creates an instance of new empty training set
|
DataSet(int inputSize,
int outputSize)
Creates an instance of new empty training set
|
Modifier and Type | Method and Description |
---|---|
DataSet |
add(DataSetRow row)
Adds new row row to this data set
|
void |
add(double[] input,
double[] output)
Adds a new dataset row with specified input and output
|
void |
add(int index,
DataSetRow row) |
void |
addRow(double[] input)
Adds a new dataset row with specified input
|
void |
clear()
Removes all alements from training set
|
static DataSet |
createFromFile(String filePath,
int inputsCount,
int outputsCount,
String delimiter)
Creates and returns data set from specified csv file
|
static DataSet |
createFromFile(String filePath,
int inputsCount,
int outputsCount,
String delimiter,
boolean loadColumnNames)
Creates and returns data set from specified csv file
|
DataSet[] |
createTrainingAndTestSubsets(double trainSetPercent,
double testSetPercent)
Returns training and test subsets in the specified percent ratio
|
DataSetRow |
get(int index) |
String |
getColumnName(int idx) |
String[] |
getColumnNames() |
DataSetColumnType |
getColumnType(int index) |
DataSetColumnType[] |
getColumnTypes() |
String |
getFilePath()
Returns full file path for this training set
|
int |
getInputSize()
Returns input vector size of training elements in this training set This
method is implementation of EngineIndexableSet interface, and it is added
to provide compatibility with Encog data sets and FlatNetwork
|
List<DataSetRow> |
getItems() |
String |
getLabel()
Returns label for this training set
|
int |
getOutputSize()
Returns output vector size of training elements in this training set.
|
DataSetRow |
getRowAt(int idx)
Returns training row at specified index position
|
List<DataSetRow> |
getRows()
Returns elements of this training set
|
int |
indexOf(Object row) |
boolean |
isEmpty()
Returns true if training set is empty, false otherwise
|
boolean |
isSupervised()
Returns true if data set is supervised, false otherwise
|
Iterator<DataSetRow> |
iterator()
Returns Iterator for iterating training elements collection
|
static DataSet |
load(String filePath)
Loads training set from the specified file
TODO: throw checked exceptionse here
|
DataSetRow |
remove(int index) |
boolean |
remove(Object row) |
void |
removeRowAt(int idx)
Removes training row at specified index position
|
DataSet[] |
sample(Sampling sampling) |
void |
save()
Saves this training set to file specified in its filePath field
|
void |
save(String filePath)
Saves this training set to the specified file
|
void |
saveAsTxt(String filePath,
String delimiter) |
void |
setColumnName(int idx,
String columnName) |
void |
setColumnNames(String[] columnNames) |
void |
setColumnType(int index,
DataSetColumnType columnType)
Sets column type for the given index.
|
void |
setFilePath(String filePath)
Sets full file path for this training set
|
void |
setLabel(String label)
Sets label for this training set
|
void |
shuffle() |
int |
size()
Returns number of training elements in this training set set
|
DataSet[] |
split(double... parts)
Splits data sets into parts of specified sizes.
|
DataSet[] |
split(int numParts)
Splits data set into specified number of parts and returns them as a list.
|
javax.visrec.ml.data.DataSet<DataSetRow>[] |
split(int numParts,
Random rnd) |
javax.visrec.ml.data.DataSet<DataSetRow>[] |
split(Random rnd,
double... parts) |
String |
toCSV()
Returns enire dataset in csv format
|
String |
toString()
Returns string representation of this data set
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
forEach, spliterator
public DataSet(int inputSize)
inputSize
- public DataSet(int inputSize, int outputSize)
inputSize
- Length of the input vectoroutputSize
- Length of the output vectorpublic DataSet add(DataSetRow row) throws VectorSizeMismatchException
add
in interface javax.visrec.ml.data.DataSet<DataSetRow>
row
- data set row to addVectorSizeMismatchException
public void addRow(double[] input)
input
- public void add(double[] input, double[] output)
input
- output
- public void removeRowAt(int idx)
idx
- position of row to removepublic Iterator<DataSetRow> iterator()
iterator
in interface Iterable<DataSetRow>
public List<DataSetRow> getRows()
public DataSetRow getRowAt(int idx)
idx
- index position of training row to returnpublic void clear()
clear
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public boolean isEmpty()
isEmpty
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public boolean isSupervised()
public int size()
size
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public String getLabel()
public void setLabel(String label)
label
- label for this training setpublic String[] getColumnNames()
public void setColumnNames(String[] columnNames)
public String getColumnName(int idx)
public void setColumnName(int idx, String columnName)
public DataSetColumnType[] getColumnTypes()
public DataSetColumnType getColumnType(int index)
public void setColumnType(int index, DataSetColumnType columnType)
index
- Index of the column in the row.columnType
- Column type to set, nominal or numeric.public void setFilePath(String filePath)
filePath
- public String getFilePath()
public String toString()
public String toCSV()
public void save(String filePath)
filePath
- public void save()
public static DataSet load(String filePath)
filePath
- training set filepublic static DataSet createFromFile(String filePath, int inputsCount, int outputsCount, String delimiter, boolean loadColumnNames)
filePath
- path to csv dataset file to importinputsCount
- number of inputsoutputsCount
- number of outputsdelimiter
- delimiter of valuesloadColumnNames
- true if csv file contains column names in first line, false otherwisepublic static DataSet createFromFile(String filePath, int inputsCount, int outputsCount, String delimiter)
filePath
- path to csv dataset file to importinputsCount
- number of inputsoutputsCount
- number of outputsdelimiter
- delimiter of valuespublic DataSet[] createTrainingAndTestSubsets(double trainSetPercent, double testSetPercent)
trainSetPercent
- testSetPercent
- public DataSet[] split(int numParts)
split
in interface javax.visrec.ml.data.DataSet<DataSetRow>
numParts
- public DataSet[] split(double... parts)
split
in interface javax.visrec.ml.data.DataSet<DataSetRow>
parts
- public int getOutputSize()
public int getInputSize()
public void shuffle()
shuffle
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public boolean remove(Object row)
public DataSetRow get(int index)
get
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public void add(int index, DataSetRow row)
public DataSetRow remove(int index)
public int indexOf(Object row)
public List<DataSetRow> getItems()
getItems
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public javax.visrec.ml.data.DataSet<DataSetRow>[] split(int numParts, Random rnd)
split
in interface javax.visrec.ml.data.DataSet<DataSetRow>
public javax.visrec.ml.data.DataSet<DataSetRow>[] split(Random rnd, double... parts)
split
in interface javax.visrec.ml.data.DataSet<DataSetRow>
Copyright © 2019 Neuroph Project. All rights reserved.