/** * Returns a new tensor which has the values of the original tensor * scaled by {@code scaling}. The original object is * unaffected. */ public SimpleTensor scale(double scaling) { SimpleTensor result = new SimpleTensor(numRows, numCols, numSlices); for (int slice = 0; slice < numSlices; ++slice) { result.slices[slice] = slices[slice].scale(scaling); } return result; }
static SimpleMatrix randomWordVector(int size, Random rand) { return NeuralUtils.randomGaussian(size, 1, rand).scale(0.1); }
/** * Creates a random context matrix. This will be numRows x * 2*numCols big. These can be appended to the end of either a * unary or binary transform matrix to get the transform matrix * which uses context words. */ private SimpleMatrix randomContextMatrix() { SimpleMatrix matrix = new SimpleMatrix(numRows, numCols * 2); matrix.insertIntoThis(0, 0, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix.insertIntoThis(0, numCols, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix = matrix.plus(SimpleMatrix.random(numRows,numCols * 2,-1.0/Math.sqrt((double)numCols * 100.0),1.0/Math.sqrt((double)numCols * 100.0),rand)); return matrix; }
public void addRandomUnaryMatrix(String childBasic) { if (unaryTransform.get(childBasic) != null) { return; } ++numUnaryMatrices; // scoring matrix SimpleMatrix score = SimpleMatrix.random(1, numCols, -1.0/Math.sqrt((double)numCols),1.0/Math.sqrt((double)numCols),rand); unaryScore.put(childBasic, score.scale(op.trainOptions.scalingForInit)); SimpleMatrix transform; if (op.trainOptions.useContextWords) { transform = new SimpleMatrix(numRows, numCols * 3 + 1); // leave room for bias term transform.insertIntoThis(0,numCols + 1, randomContextMatrix()); } else { transform = new SimpleMatrix(numRows, numCols + 1); } SimpleMatrix unary = randomTransformMatrix(); transform.insertIntoThis(0, 0, unary); unaryTransform.put(childBasic, transform.scale(op.trainOptions.scalingForInit)); }
private static double scaleAndRegularize(Map<String, SimpleMatrix> derivatives, Map<String, SimpleMatrix> currentMatrices, double scale, double regCost, boolean activeMatricesOnly, boolean dropBiasColumn) { double cost = 0.0; // the regularization cost for (Map.Entry<String, SimpleMatrix> entry : currentMatrices.entrySet()) { SimpleMatrix D = derivatives.get(entry.getKey()); if (activeMatricesOnly && D == null) { // Fill in an emptpy matrix so the length of theta can match. // TODO: might want to allow for sparse parameter vectors derivatives.put(entry.getKey(), new SimpleMatrix(entry.getValue().numRows(), entry.getValue().numCols())); continue; } SimpleMatrix regMatrix = entry.getValue(); if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.insertIntoThis(0, regMatrix.numCols() - 1, new SimpleMatrix(regMatrix.numRows(), 1)); } D = D.scale(scale).plus(regMatrix.scale(regCost)); derivatives.put(entry.getKey(), D); cost += regMatrix.elementMult(regMatrix).elementSum() * regCost / 2.0; } return cost; }
/** * Returns matrices of the right size for either binary or unary (terminal) classification */ SimpleMatrix randomClassificationMatrix() { SimpleMatrix score = new SimpleMatrix(numClasses, numHid + 1); double range = 1.0 / (Math.sqrt((double) numHid)); score.insertIntoThis(0, 0, SimpleMatrix.random(numClasses, numHid, -range, range, rand)); // bias column goes from 0 to 1 initially score.insertIntoThis(0, numHid, SimpleMatrix.random(numClasses, 1, 0.0, 1.0, rand)); return score.scale(op.trainOptions.scalingForInit); }
SimpleMatrix randomTransformMatrix() { SimpleMatrix binary = new SimpleMatrix(numHid, numHid * 2 + 1); // bias column values are initialized zero binary.insertIntoThis(0, 0, randomTransformBlock()); binary.insertIntoThis(0, numHid, randomTransformBlock()); return binary.scale(op.trainOptions.scalingForInit); }
/** * Applies softmax to all of the elements of the matrix. The return * matrix will have all of its elements sum to 1. If your matrix is * not already a vector, be sure this is what you actually want. */ public static SimpleMatrix softmax(SimpleMatrix input) { SimpleMatrix output = new SimpleMatrix(input); for (int i = 0; i < output.numRows(); ++i) { for (int j = 0; j < output.numCols(); ++j) { output.set(i, j, Math.exp(output.get(i, j))); } } double sum = output.elementSum(); // will be safe, since exp should never return 0 return output.scale(1.0 / sum); }
public void addRandomBinaryMatrix(String leftBasic, String rightBasic) { if (binaryTransform.get(leftBasic, rightBasic) != null) { return; } ++numBinaryMatrices; // scoring matrix SimpleMatrix score = SimpleMatrix.random(1, numCols, -1.0/Math.sqrt((double)numCols),1.0/Math.sqrt((double)numCols),rand); binaryScore.put(leftBasic, rightBasic, score.scale(op.trainOptions.scalingForInit)); SimpleMatrix binary; if (op.trainOptions.useContextWords) { binary = new SimpleMatrix(numRows, numCols * 4 + 1); // leave room for bias term binary.insertIntoThis(0,numCols*2+1, randomContextMatrix()); } else { binary = new SimpleMatrix(numRows, numCols * 2 + 1); } SimpleMatrix left = randomTransformMatrix(); SimpleMatrix right = randomTransformMatrix(); binary.insertIntoThis(0, 0, left); binary.insertIntoThis(0, numCols, right); binaryTransform.put(leftBasic, rightBasic, binary.scale(op.trainOptions.scalingForInit)); }
private static SimpleTensor getTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector) { int size = deltaFull.getNumElements(); SimpleTensor Wt_df = new SimpleTensor(size*2, size*2, size); // TODO: combine this concatenation with computeTensorDeltaDown? SimpleMatrix fullVector = NeuralUtils.concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { Wt_df.setSlice(slice, fullVector.scale(deltaFull.get(slice)).mult(fullVector.transpose())); } return Wt_df; }
private static double scaleAndRegularize(TwoDimensionalMap<String, String, SimpleMatrix> derivatives, TwoDimensionalMap<String, String, SimpleMatrix> currentMatrices, double scale, double regCost, boolean dropBiasColumn) { double cost = 0.0; // the regularization cost for (TwoDimensionalMap.Entry<String, String, SimpleMatrix> entry : currentMatrices) { SimpleMatrix D = derivatives.get(entry.getFirstKey(), entry.getSecondKey()); SimpleMatrix regMatrix = entry.getValue(); if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.insertIntoThis(0, regMatrix.numCols() - 1, new SimpleMatrix(regMatrix.numRows(), 1)); } D = D.scale(scale).plus(regMatrix.scale(regCost)); derivatives.put(entry.getFirstKey(), entry.getSecondKey(), D); cost += regMatrix.elementMult(regMatrix).elementSum() * regCost / 2.0; } return cost; }
private static SimpleMatrix computeTensorDeltaDown(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector, SimpleMatrix W, SimpleTensor Wt) { SimpleMatrix WTDelta = W.transpose().mult(deltaFull); SimpleMatrix WTDeltaNoBias = WTDelta.extractMatrix(0, deltaFull.numRows() * 2, 0, 1); int size = deltaFull.getNumElements(); SimpleMatrix deltaTensor = new SimpleMatrix(size*2, 1); SimpleMatrix fullVector = NeuralUtils.concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { SimpleMatrix scaledFullVector = fullVector.scale(deltaFull.get(slice)); deltaTensor = deltaTensor.plus(Wt.getSlice(slice).plus(Wt.getSlice(slice).transpose()).mult(scaledFullVector)); } return deltaTensor.plus(WTDeltaNoBias); }
SimpleMatrix deltaClass = goldClass >= 0 ? predictions.minus(goldLabel).scale(nodeWeight) : new SimpleMatrix(predictions.numRows(), predictions.numCols()); SimpleMatrix localCD = deltaClass.mult(NeuralUtils.concatenateWithBias(currentVector).transpose());
/** * Returns a new tensor which has the values of the original tensor * scaled by {@code scaling}. The original object is * unaffected. */ public SimpleTensor scale(double scaling) { SimpleTensor result = new SimpleTensor(numRows, numCols, numSlices); for (int slice = 0; slice < numSlices; ++slice) { result.slices[slice] = slices[slice].scale(scaling); } return result; }
/** * Creates a random context matrix. This will be numRows x * 2*numCols big. These can be appended to the end of either a * unary or binary transform matrix to get the transform matrix * which uses context words. */ private SimpleMatrix randomContextMatrix() { SimpleMatrix matrix = new SimpleMatrix(numRows, numCols * 2); matrix.insertIntoThis(0, 0, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix.insertIntoThis(0, numCols, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix = matrix.plus(SimpleMatrix.random(numRows,numCols * 2,-1.0/Math.sqrt((double)numCols * 100.0),1.0/Math.sqrt((double)numCols * 100.0),rand)); return matrix; }
/** * Creates a random context matrix. This will be numRows x * 2*numCols big. These can be appended to the end of either a * unary or binary transform matrix to get the transform matrix * which uses context words. */ private SimpleMatrix randomContextMatrix() { SimpleMatrix matrix = new SimpleMatrix(numRows, numCols * 2); matrix.insertIntoThis(0, 0, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix.insertIntoThis(0, numCols, identity.scale(op.trainOptions.scalingForInit * 0.1)); matrix = matrix.plus(SimpleMatrix.random(numRows,numCols * 2,-1.0/Math.sqrt((double)numCols * 100.0),1.0/Math.sqrt((double)numCols * 100.0),rand)); return matrix; }
/** * Returns matrices of the right size for either binary or unary (terminal) classification */ SimpleMatrix randomClassificationMatrix() { SimpleMatrix score = new SimpleMatrix(numClasses, numHid + 1); double range = 1.0 / (Math.sqrt((double) numHid)); score.insertIntoThis(0, 0, SimpleMatrix.random(numClasses, numHid, -range, range, rand)); // bias column goes from 0 to 1 initially score.insertIntoThis(0, numHid, SimpleMatrix.random(numClasses, 1, 0.0, 1.0, rand)); return score.scale(op.trainOptions.scalingForInit); }
SimpleMatrix randomTransformMatrix() { SimpleMatrix binary = new SimpleMatrix(numHid, numHid * 2 + 1); // bias column values are initialized zero binary.insertIntoThis(0, 0, randomTransformBlock()); binary.insertIntoThis(0, numHid, randomTransformBlock()); return binary.scale(op.trainOptions.scalingForInit); }
private SimpleTensor getTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector) { int size = deltaFull.getNumElements(); SimpleTensor Wt_df = new SimpleTensor(size*2, size*2, size); // TODO: combine this concatenation with computeTensorDeltaDown? SimpleMatrix fullVector = NeuralUtils.concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { Wt_df.setSlice(slice, fullVector.scale(deltaFull.get(slice)).mult(fullVector.transpose())); } return Wt_df; }
private static SimpleTensor getTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector) { int size = deltaFull.getNumElements(); SimpleTensor Wt_df = new SimpleTensor(size*2, size*2, size); // TODO: combine this concatenation with computeTensorDeltaDown? SimpleMatrix fullVector = NeuralUtils.concatenate(leftVector, rightVector); for (int slice = 0; slice < size; ++slice) { Wt_df.setSlice(slice, fullVector.scale(deltaFull.get(slice)).mult(fullVector.transpose())); } return Wt_df; }