@Override public URI getTableUri(TableMeta meta, String databaseName, String tableName) { String tablespaceUriString = uri.toASCIIString(); String tablePath = meta.getProperty(PATH); if (!tablespaceUriString.endsWith("/") && !tablePath.startsWith("/")) { tablePath = "/" + tablePath; } return URI.create(tablespaceUriString + tablePath); }
public static byte[] getFieldDelimiter(TableMeta meta) { return StringEscapeUtils.unescapeJava(meta.getProperty(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER)).getBytes(Bytes.UTF8_CHARSET); } }
public static byte[] getFieldDelimiter(TableMeta meta) { return StringEscapeUtils.unescapeJava(meta.getProperty(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER)).getBytes(Bytes.UTF8_CHARSET); } }
public TextFieldSerializerDeserializer(TableMeta meta) { tableTimezone = TimeZone.getTimeZone( meta.getProperty(StorageConstants.TIMEZONE, TAJO_CONF.getSystemTimezone().getID())); }
public TextFieldSerializerDeserializer(TableMeta meta) { tableTimezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); }
public CSVLineDeserializer(Schema schema, TableMeta meta, Column [] projected) { super(schema, meta); targetColumnIndexes = PlannerUtil.getTargetIds(schema, projected); // The quote char must be a single ASCII character. hasQuoteChar = meta.containsProperty(StorageConstants.QUOTE_CHAR); quoteChar = meta.getProperty(StorageConstants.QUOTE_CHAR, "\0").getBytes()[0]; }
public static byte [] getNullCharsAsBytes(TableMeta meta) { byte [] nullChars; String nullCharacters = StringEscapeUtils.unescapeJava(meta.getProperty(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT)); if (StringUtils.isEmpty(nullCharacters)) { nullChars = NullDatum.get().asTextBytes(); } else { nullChars = nullCharacters.getBytes(Bytes.UTF8_CHARSET); } return nullChars; }
public static byte[] getNullCharsAsBytes(TableMeta meta, String key, String defaultVal) { byte [] nullChars; String nullCharacters = StringEscapeUtils.unescapeJava(meta.getProperty(key, defaultVal)); if (StringUtils.isEmpty(nullCharacters)) { nullChars = NullDatum.get().asTextBytes(); } else { nullChars = nullCharacters.getBytes(Bytes.UTF8_CHARSET); } return nullChars; }
public static Schema getAvroSchema(TableMeta meta, Configuration conf) throws IOException { boolean isSchemaLiteral = meta.containsProperty(StorageConstants.AVRO_SCHEMA_LITERAL); boolean isSchemaUrl = meta.containsProperty(StorageConstants.AVRO_SCHEMA_URL); if (!isSchemaLiteral && !isSchemaUrl) { throw new RuntimeException("No Avro schema for table."); } if (isSchemaLiteral) { String schema = meta.getProperty(StorageConstants.AVRO_SCHEMA_LITERAL); return new Schema.Parser().parse(schema); } String schemaURL = meta.getProperty(StorageConstants.AVRO_SCHEMA_URL); if (schemaURL.toLowerCase().startsWith("http")) { return getAvroSchemaFromHttp(schemaURL); } else { return getAvroSchemaFromFileSystem(schemaURL, conf); } }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
public JsonLineDeserializer(Schema schema, TableMeta meta, Column [] projected) { super(schema, meta); projectedPaths = SchemaUtil.convertColumnsToPaths(Lists.newArrayList(projected), true); types = SchemaUtil.buildTypeMap(schema.getAllColumns(), projectedPaths); timezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); }
@Override public void init() { // Read the configuration parameters outputFormatString = meta.getProperty(StorageConstants.TEXT_REGEX_OUTPUT_FORMAT_STRING); if (outputFormatString == null) { throw new TajoRuntimeException(new InvalidTablePropertyException(StorageConstants.TEXT_REGEX_OUTPUT_FORMAT_STRING, "Cannot write data into table because \"" + StorageConstants.TEXT_REGEX_OUTPUT_FORMAT_STRING + "\"" + " is not specified in serde properties of the table.")); } tableTimezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); nullChars = new String(TextLineSerDe.getNullCharsAsBytes(meta), CharsetUtil.UTF_8); columnNum = schema.size(); }
public ExampleHttpJsonScanner(Configuration conf, Schema schema, TableMeta tableMeta, Fragment fragment) throws IOException { super(conf, schema, tableMeta, fragment); reader = new ExampleHttpJsonLineReader(conf, this.fragment, conf.getInt(READ_BUFFER_SIZE, 128 * StorageUnit.KB)); if (!this.reader.isCompressed()) { splittable = true; } startOffset = this.fragment.getStartKey(); endOffset = this.fragment.getEndKey(); maxAllowedErrorCount = Integer.parseInt(tableMeta.getProperty(TEXT_ERROR_TOLERANCE_MAXNUM, DEFAULT_TEXT_ERROR_TOLERANCE_MAXNUM)); }
private static CompressionKind getCompressionKind(TableMeta meta) { String kindstr = meta.getProperty(OrcConf.COMPRESS.getAttribute(), String.valueOf(OrcConf.COMPRESS.getDefaultValue())); if (kindstr.equalsIgnoreCase(CompressionKind.ZLIB.name())) { return CompressionKind.ZLIB; } if (kindstr.equalsIgnoreCase(CompressionKind.SNAPPY.name())) { return CompressionKind.SNAPPY; } if (kindstr.equalsIgnoreCase(CompressionKind.LZO.name())) { return CompressionKind.LZO; } return CompressionKind.NONE; }
public DelimitedTextFileScanner(Configuration conf, final Schema schema, final TableMeta meta, final Fragment fragment) throws IOException { super(conf, schema, meta, fragment); reader = new DelimitedLineReader(conf, this.fragment, conf.getInt(READ_BUFFER_SIZE, 128 * StorageUnit.KB)); if (!reader.isCompressed()) { splittable = true; } startOffset = this.fragment.getStartKey(); endOffset = this.fragment.getEndKey(); errorTorrenceMaxNum = Integer.parseInt(meta.getProperty(TEXT_ERROR_TOLERANCE_MAXNUM, DEFAULT_TEXT_ERROR_TOLERANCE_MAXNUM)); }
@Override public void init() { fieldSerDer = new TextFieldSerializerDeserializer(meta); fieldSerDer.init(schema); // Read the configuration parameters inputRegex = meta.getProperty(StorageConstants.TEXT_REGEX); boolean inputRegexIgnoreCase = "true".equalsIgnoreCase( meta.getProperty(StorageConstants.TEXT_REGEX_CASE_INSENSITIVE, "false")); // Parse the configuration parameters if (inputRegex != null) { inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0)); } else { throw new TajoRuntimeException(new InvalidTablePropertyException(StorageConstants.TEXT_REGEX, "This table does not have serde property \"" + StorageConstants.TEXT_REGEX + "\"!")); } if (nullChars != null) { nullChars.release(); } nullChars = TextLineSerDe.getNullChars(meta); }
@Override public void init() throws IOException { timezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); writer = OrcFile.createWriter(path, buildWriterOptions(conf, meta, schema), timezone); if (tableStatsEnabled) { this.stats = new TableStatistics(schema, columnStatsEnabled); } super.init(); }
public JsonLineSerializer(Schema schema, TableMeta meta) { super(schema, meta); projectedPaths = SchemaUtil.convertColumnsToPaths(schema.getAllColumns(), true); types = SchemaUtil.buildTypeMap(schema.getAllColumns(), projectedPaths); timezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); }
@Override public void init() throws IOException { inited = true; if (targets == null) { targets = schema.toArray(); } outTuple = new VTuple(targets.length); fragmentSize = Integer.parseInt(meta.getProperty(KafkaStorageConstants.KAFKA_FRAGMENT_SIZE, KafkaStorageConstants.DEFAULT_FRAGMENT_SIZE)); pollTimeout = Long.parseLong(meta.getProperty(KafkaStorageConstants.KAFKA_POLL_TIMEOUT, KafkaStorageConstants.DEFAULT_POLL_TIMEOUT)); // create deserializer. default is DELIMITER('|') text deserializer. deserializer = DelimitedTextFile.getLineSerde(meta).createDeserializer(schema, meta, targets); deserializer.init(); simpleConsumerManager = new SimpleConsumerManager(fragment.getUri(), fragment.getTopicName(), fragment.getPartitionId(), fragmentSize); initOffset(); }
public void assertTablePropertyEquals(String tableName, String key, String expectedValue) throws UndefinedTableException { TableDesc tableDesc = getTableDesc(tableName); assertEquals(expectedValue, tableDesc.getMeta().getProperty(key)); }