/** * Generic method to create an input {@link DataSet} with in {@link InputFormat}. The DataSet will not be * immediately created - instead, this method returns a DataSet that will be lazily created from * the input format once the program is executed. * * <p>Since all data sets need specific information about their types, this method needs to determine * the type of the data produced by the input format. It will attempt to determine the data type * by reflection, unless the input format implements the {@link ResultTypeQueryable} interface. * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()} * method to determine data type produced by the input format. * * @param inputFormat The input format used to create the data set. * @return A {@link DataSet} that represents the data created by the input format. * * @see #createInput(InputFormat, TypeInformation) */ public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead.", e); } }
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } if (filePath == null) { throw new IllegalArgumentException("The file path must not be null."); } inputFormat.setFilePath(new Path(filePath)); try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead."); } }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
typeInformation = TypeExtractor.getInputFormatTypes(inputFormat); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be " +
typeInformation = TypeExtractor.getInputFormatTypes(inputFormat); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be " +
@Test public void testExtractInputFormatType() { try { InputFormat<?, ?> format = new DummyFloatInputFormat(); TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format); assertEquals(BasicTypeInfo.FLOAT_TYPE_INFO, typeInfo); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testQueryableFormatType() { try { InputFormat<?, ?> format = new QueryableInputFormat(); TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format); assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, typeInfo); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, 10, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format); assertEquals(BasicTypeInfo.SHORT_TYPE_INFO, typeInfo); TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format);
@Test public void testTypeExtraction() { try { InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class); TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<MyAvroType> input = env.createInput(format); TypeInformation<?> typeInfoDataSet = input.getType(); Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo); Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo); Assert.assertEquals(MyAvroType.class, typeInfoDirect.getTypeClass()); Assert.assertEquals(MyAvroType.class, typeInfoDataSet.getTypeClass()); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Test public void testMultiLevelDerivedInputFormatType() { try { // composite type { InputFormat<?, ?> format = new FinalRelativeInputFormat(); TypeInformation<?> typeInfo = TypeExtractor.getInputFormatTypes(format); assertTrue(typeInfo.isTupleType()); assertTrue(typeInfo instanceof TupleTypeInfo); @SuppressWarnings("unchecked") TupleTypeInfo<Tuple3<String, Integer, Double>> tupleInfo = (TupleTypeInfo<Tuple3<String, Integer, Double>>) typeInfo; assertEquals(3, tupleInfo.getArity()); assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tupleInfo.getTypeAt(0)); assertEquals(BasicTypeInfo.INT_TYPE_INFO, tupleInfo.getTypeAt(1)); assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tupleInfo.getTypeAt(2)); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Generic method to create an input {@link DataSet} with in {@link InputFormat}. The DataSet will not be * immediately created - instead, this method returns a DataSet that will be lazily created from * the input format once the program is executed. * * <p>Since all data sets need specific information about their types, this method needs to determine * the type of the data produced by the input format. It will attempt to determine the data type * by reflection, unless the input format implements the {@link ResultTypeQueryable} interface. * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()} * method to determine data type produced by the input format. * * @param inputFormat The input format used to create the data set. * @return A {@link DataSet} that represents the data created by the input format. * * @see #createInput(InputFormat, TypeInformation) */ public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead.", e); } }
/** * Generic method to create an input {@link DataSet} with in {@link InputFormat}. The DataSet will not be * immediately created - instead, this method returns a DataSet that will be lazily created from * the input format once the program is executed. * * <p>Since all data sets need specific information about their types, this method needs to determine * the type of the data produced by the input format. It will attempt to determine the data type * by reflection, unless the input format implements the {@link ResultTypeQueryable} interface. * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()} * method to determine data type produced by the input format. * * @param inputFormat The input format used to create the data set. * @return A {@link DataSet} that represents the data created by the input format. * * @see #createInput(InputFormat, TypeInformation) */ public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead.", e); } }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } if (filePath == null) { throw new IllegalArgumentException("The file path must not be null."); } inputFormat.setFilePath(new Path(filePath)); try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead."); } }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } if (filePath == null) { throw new IllegalArgumentException("The file path must not be null."); } inputFormat.setFilePath(new Path(filePath)); try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead."); } }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } if (filePath == null) { throw new IllegalArgumentException("The file path must not be null."); } inputFormat.setFilePath(new Path(filePath)); try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly by using the " + "'createInput(InputFormat, TypeInformation)' method instead."); } }
protected DataStream<Row> createInput(InputFormat inputFormat, String sourceName) { Preconditions.checkNotNull(sourceName); Preconditions.checkNotNull(inputFormat); TypeInformation typeInfo = TypeExtractor.getInputFormatTypes(inputFormat); InputFormatSourceFunction function = new InputFormatSourceFunction(inputFormat, typeInfo); return env.addSource(function, sourceName, typeInfo); }