public abstract class CommonRowParser extends java.lang.Object implements DataInputParser
Modifier and Type | Field and Description |
---|---|
protected HexToBinaryConverter |
_hexConverter |
protected int |
_positionKeyByteLength |
protected byte[] |
_positionKeyBytes |
protected java.lang.String |
_relationName |
protected int |
_rowPayloadColumn |
static FarragoParamFieldMetaData[] |
BINARY_PROVENANCE_COLUMNS
Provenance columns shared by all SQLstream-supplied binary parsers
|
java.nio.charset.Charset |
charset |
protected int[] |
columnsToReset |
EcdaReaderContext |
context |
java.util.Map<java.lang.String,TypeParser> |
customParsers |
static char |
HIGH_BYTE |
long |
highwaterMark |
protected java.util.Properties |
initProps |
RowInserter |
inputStmt |
BuffersInputStream |
inputStream |
protected boolean |
isProjected |
long |
lineNo |
static LogLevel |
logLevel |
static char |
LOW_BYTE |
static int |
MAX_LOGGED_PAYLOAD |
protected long |
messageStartPosition |
ExtendedParameterMetaData |
metaData |
static int |
NO_COLUMN |
int |
numColumns |
protected static java.lang.String[] |
OPTIONAL_PROPERTIES |
java.lang.String |
parseError |
protected CommonRowParser.ColumnUpdater[] |
parserInfoHandlers |
long |
parserPosition |
static java.lang.String |
POSITION_KEY_SEPARATOR |
java.util.Properties |
props |
protected static java.lang.String[] |
REQUIRED_PROPERTIES |
static int |
SKIP_ROW_PAYLOAD |
protected static java.lang.String[] |
SPECIAL_COLUMNS |
static FarragoParamFieldMetaData[] |
STRING_PROVENANCE_COLUMNS
Provenance columns shared by all SQLstream-supplied string parsers
|
static java.util.logging.Logger |
tracer |
protected TypeParser[] |
typeParsers |
AUTH_KEYTAB, AUTH_METASTORE_PRINCIPAL, AUTH_METHOD, AUTH_USERNAME, COLUMN_MAPPING_KEY, COLUMN_QUOTE_CHARACTER, CONFIG_PATH, DEFAULT_PARSER_QUEUE_SIZE, DISALLOW_QUOTED_ROW_SEPARATOR, DISCARD_ROW_ON_PARSE_ERROR_KEY, ECDA_SESSION_NAME, FILE_COMPRESSION, FILE_COMPRESSION_BUFFER, FILE_ROTATION_RESPECT_ROWTIME, FILE_ROTATION_ROWCOUNT, FILE_ROTATION_SIZE, FILE_ROTATION_TIME, FILE_ROTATION_WATERMARK_COLUMN, FILE_TYPE, FILENAME_DATE_FORMAT, FILENAME_PATTERN, FILENAME_PREFIX, FILENAME_SUFFIX, FORMAT_CHARSET_KEY, FORMAT_TYPE_KEY, FORMAT_WRITE_HEADER_KEY, FORMATTER_CLASSNAME_KEY, FORMATTER_INCLUDE_ROWTIME, FORMATTER_PREFIX, FORMATTER_TYPE, HDFS_LOG_DIRECTORY, HDFS_OUTPUT_DIR, HDFS_POLLING_INTERVAL, HDFS_TYPE, HIVE_METASTORE_URIS, HIVE_PARTITION_NAME_FORMAT_SUFFIX, HIVE_SCHEMA_NAME, HIVE_SKIP_MSCK, HIVE_TABLE_NAME, HIVE_TABLE_OPTIONS, HIVE_TEMPLATE_SCHEMA_NAME, HIVE_TEMPLATE_TABLE_NAME, HIVE_TYPE, HIVE_URI, INTERNAL_PREFIX, IS_IPV6_KEY, IS_TCP_KEY, JDBC_TYPE, KERBEROS_OPTIONS, KERBEROS_SECURITY, MAX_PUNCT_TIME, MESSAGE_STREAM, MIN_PUNCT_TIME, OPTIONS_QUERY, ORC_BATCH_SIZE, ORC_BLOCK_PADDING, ORC_BLOCK_SIZE, ORC_BLOOM_FILTER_COLUMNS, ORC_BLOOM_FILTER_FPP, ORC_COMPRESS, ORC_COMPRESS_SIZE, ORC_DIRECT_ENCODING_COLUMNS, ORC_OPTION_PREFIX, ORC_ROW_INDEX_STRIDE, ORC_SORT_ORDER, ORC_STRIPE_SIZE, ORC_USER_METADATA_PREFIX, ORC_VERSION, ORIGINAL_FILENAME, PARSER_CHARSET_KEY, PARSER_PREFIX, PARSER_QUEUE_SIZE, PARSER_SKIP_HEADER_KEY, PARSER_TYPE, PATH_SUFFIX, PLUGIN_LEVEL, POSITION_KEY_COL, POSTPROCESS_COMMAND, PROJECTED_COLUMN_MAPPING_KEY, PROV_AMQP_CREATION_TIME, PROV_AMQP_PARTITION, PROV_FILE_SOURCE_FILE, PROV_IBMMQ_APPLICATION_ID_DATA, PROV_IBMMQ_APPLICATION_ORIGIN_DATA, PROV_IBMMQ_GROUP_ID, PROV_IBMMQ_MESSAGE_SEQUENCE_NUMBER, PROV_IBMMQ_PUT_APPLICATION_NAME, PROV_IBMMQ_PUT_APPLICATION_TYPE, PROV_IBMMQ_PUT_DATA_TIME, PROV_IBMMQ_USER_ID, PROV_KAFKA_HEADERS, PROV_KAFKA_KEY, PROV_KAFKA_OFFSET, PROV_KAFKA_PARTITION, PROV_KAFKA_PAYLOAD_PREFIX, PROV_KAFKA_TIMESTAMP, PROV_KAFKA_TOPIC, PROV_KINESIS_PARTITION_ID, PROV_KINESIS_SEQUENCE_NUMBER, PROV_KINESIS_SHARD_ID, PROV_MQTT_MESSAGE_ID, PROV_MQTT_TOPIC, PROV_PARSE_ERROR, PROV_PARSE_LINE_NUMBER, PROV_PARSE_POSITION, PROV_ROW_PAYLOAD, PROV_SOCKET_SOURCE_HOST, PROV_SOCKET_SOURCE_PORT, PROVENANCE_PREFIX, QUOTED_COLUMNS, RELATION_NAME, REMOTE_HOST_KEY, REMOTE_PORT_KEY, REPEAT, ROOT_DIRECTORY, ROW_SEPARATOR_CHAR_KEY, ROWTIME_COLUMN_NAME, SEPARATOR_CHAR_KEY, SERVER_HOST_KEY, SERVER_PORT_KEY, SERVER_TYPE, SORT_BY_TIMESTAMP, SQLSTREAM_PREFIX, STARTING_OUTPUT_ROWTIME, STARTING_POSITION, STARTING_TIME, STATIC_FILES, TOY_HASH_BUCKETS, TOY_HASH_COLUMNS, TOY_PARTITION_COLUMNS, TOY_VERBOSE, TYPE_AVRO, TYPE_CSV, TYPE_DISCOVERY, TYPE_FAST_REGEX, TYPE_FCLP, TYPE_JSON, TYPE_KEY_VALUE, TYPE_MULTIROW, TYPE_NONE, TYPE_ORC, TYPE_PROTOBUF, TYPE_TOY, TYPE_VCLP, TYPE_W3C, TYPE_XML, UNKNOWN_TYPE, UNPARSED_TEXT_KEY, UNPARSED_TEXT_LAST_COLUMN, UNPARSED_TEXT_NEW_ROW, UNPARSED_TEXT_TRUNCATE, UNPROJECTED_COLUMNS_KEY, UNPROJECTED_NUM_COLUMNS_KEY, WATERMARK_STORE, WATERMARKED_SINK, WATERMARKS, WS_POSITION_KEY_COL, WS_SINK_NAME_COL
Modifier | Constructor and Description |
---|---|
protected |
CommonRowParser() |
Modifier and Type | Method and Description |
---|---|
protected void |
addParserInfoHandlers(java.util.ArrayList<CommonRowParser.ColumnUpdater> handlers) |
protected java.sql.SQLException |
badCastFromChar(java.lang.String errorMessage)
Wrap an error in a "bad cast" SQLException.
|
void |
closeAllocation() |
static boolean |
compareBytes(byte[] a,
int aIdx,
byte[] b,
int bIdx,
int numCheck) |
void |
discardCurrentRow(java.util.logging.Logger discardTracer)
Discard the current row, writing error information to the trace log and the
global error stream.
|
boolean |
getBoolPropertyValue(java.lang.String key,
boolean defVal) |
java.nio.charset.Charset |
getCharset()
Get the type of character encoding assumed by this parser.
|
java.util.Properties |
getInitProperties() |
int |
getIntPropertyValue(java.lang.String key,
int defVal) |
long |
getParserLineNumber() |
java.lang.CharSequence |
getPositionKey()
Get the position key of the current input row.
|
java.lang.CharSequence |
getPositionKey(java.lang.String relationName,
int columnNumber)
Get the position key of the current input row
|
java.lang.Object[] |
getPositionKeyFields(java.lang.String positionKey)
Split a position key into an array of fields.
|
long |
getRowNumberInPayload()
Get the number of the row within the payload.
|
long |
getRowtimeBounds() |
TypeParser |
getTypeParser(int columnNo) |
void |
incParserLineNumber() |
void |
init(java.util.Properties props)
Initializes this formatter and throws an exception if something is
underspecified
|
protected void |
initDefaultTypeParsers(java.util.Properties props,
java.util.Properties initProps) |
protected java.sql.SQLException |
invalidCharacterInNumber(char badCharacter,
byte[] buf,
int start,
int end)
Wrap an error in a "invalid character in number" SQLException.
|
protected java.sql.SQLException |
invalidCharacterInTimestamp()
Generate a SQLException for invalid character in timestamp.
|
protected java.sql.SQLException |
invalidDateTime(java.lang.String errorMessage,
java.lang.Throwable t)
Generate a SQLException for an invalid date/time/timestamp.
|
protected java.sql.SQLException |
invalidDateTime(java.lang.Throwable t)
Generate a SQLException for an invalid date/time/timestamp.
|
boolean |
isLittleEndian(ExtendedParameterMetaData md,
int param)
Return true if the parameter's charset is little endian.
|
protected void |
logStringTruncation(int col,
byte[] buf,
int start,
int length)
Log a string truncation warning.
|
void |
logThrowable(java.util.logging.Logger logger,
java.lang.String refinedMessage,
java.lang.Throwable ex,
SqlStateCodes defaultSQLState,
java.util.logging.Level level,
boolean logStackTrace)
Log an error.
|
boolean |
logUnparsableBytes()
Return true if we should log an error for gibberish columns, but
still fill in a row.
|
void |
logUnparsableBytes(java.util.logging.Logger discardTracer,
java.lang.String message,
java.lang.Throwable detailedError,
int columnNumber,
byte[] sourceBytes,
int sourceBytesIdx,
int length)
Log an ingestion error, including the gibberish bytes
which could not be parsed.
|
static FarragoParamFieldMetaData |
makeFieldDescriptor(java.lang.String fieldName,
int jdbcType,
int precision,
int scale,
boolean isNullable)
Make a field descriptor from the passed-in fields
|
void |
markForDiscarding(java.lang.String sqlState,
java.lang.String errorMessage,
java.lang.Throwable thrown)
Mark this row to be discarded, setting the SQLState and error message
which will be logged along with the current payload.
|
boolean |
mustDiscardThisRow()
Return true if we must discard the current row and log it
to the global error stream.
|
protected long |
parseDateTime(byte[] buf,
int start,
int end,
int[] pattern) |
protected void |
parseDecimal(CommonRowParser.DecimalNumber out,
byte[] buf,
int start,
int end,
boolean sci) |
protected double |
parseDouble(byte[] buf,
int start,
int end,
CommonRowParser.DecimalNumber dec) |
protected long |
parseLong(byte[] buf,
int start,
int end) |
int |
rowPayloadColumn()
Get the 1-based index of the row payload column, if any.
|
void |
setContext(EcdaReaderContext context)
Set the context for this parser.
|
void |
setCustomParser(java.lang.String colName,
TypeParser parser) |
void |
setExecutorService(java.util.concurrent.ExecutorService service)
If this parser uses a threadpool, sets the threadpool to use, otherwise
this method should have no effect.
|
void |
setInserter(RowInserter inserter)
Set the inserter for parser.
|
void |
setParserError(java.lang.String err) |
void |
setParserLineNumber(long lineNo) |
void |
setParserPosition(long pos) |
static void |
setStringColumn(byte[] buf,
int start,
int end,
RowInserter stmt,
int col,
java.nio.charset.Charset characterSet,
boolean treatEmptyStringAsNull)
Stuff a byte array into a string column in the target stream.
|
boolean |
shouldDiscardBadRows()
Return true if errors cause us to discard rows and log them
to the global error stream.
|
protected java.sql.SQLException |
stringDataRightTruncation()
Create a "string data, right truncation" SQLException
|
void |
submit() |
void |
submit(RowInserter inputStmt) |
void |
submitRow(java.util.logging.Logger logger) |
protected TracePropertyListener |
tracePropertyListener()
Get the trace configuration.
|
protected java.sql.SQLException |
valueOutOfRange(java.lang.String errorMessage)
Wrap an error in an overflow/underflow SQLException
|
static void |
vetProvenanceColumnTypes(java.lang.String parserType,
ExtendedParameterMetaData signature,
FarragoParamFieldMetaData[] supportedProvenanceColumns)
Raise a validation error if a provenance column does not
have the correct datatype.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
start
public static final char HIGH_BYTE
public static final char LOW_BYTE
public static final int MAX_LOGGED_PAYLOAD
public static final int SKIP_ROW_PAYLOAD
public static final int NO_COLUMN
public static final java.lang.String POSITION_KEY_SEPARATOR
public static java.util.logging.Logger tracer
public static LogLevel logLevel
protected static final java.lang.String[] REQUIRED_PROPERTIES
protected static final java.lang.String[] OPTIONAL_PROPERTIES
public static final FarragoParamFieldMetaData[] STRING_PROVENANCE_COLUMNS
public static final FarragoParamFieldMetaData[] BINARY_PROVENANCE_COLUMNS
public java.util.Properties props
protected java.util.Properties initProps
public EcdaReaderContext context
public ExtendedParameterMetaData metaData
public int numColumns
public java.util.Map<java.lang.String,TypeParser> customParsers
protected boolean isProjected
public RowInserter inputStmt
public BuffersInputStream inputStream
public long highwaterMark
public long lineNo
public long parserPosition
protected long messageStartPosition
public java.lang.String parseError
public java.nio.charset.Charset charset
protected CommonRowParser.ColumnUpdater[] parserInfoHandlers
protected int[] columnsToReset
protected TypeParser[] typeParsers
protected byte[] _positionKeyBytes
protected int _positionKeyByteLength
protected HexToBinaryConverter _hexConverter
protected int _rowPayloadColumn
protected java.lang.String _relationName
protected static final java.lang.String[] SPECIAL_COLUMNS
public boolean logUnparsableBytes()
public int rowPayloadColumn()
protected TracePropertyListener tracePropertyListener()
public void setContext(EcdaReaderContext context)
DataInputParser
setContext
in interface RuntimeObject<EcdaReaderContext>
setContext
in interface DataInputParser
context
- EcdaReaderContext to use.public void init(java.util.Properties props) throws java.lang.Exception
init
in interface RuntimeObject<EcdaReaderContext>
java.lang.Exception
public java.util.Properties getInitProperties()
getInitProperties
in interface RuntimeObject<EcdaReaderContext>
public java.nio.charset.Charset getCharset()
DataInputParser
getCharset
in interface DataInputParser
public static FarragoParamFieldMetaData makeFieldDescriptor(java.lang.String fieldName, int jdbcType, int precision, int scale, boolean isNullable)
public static void vetProvenanceColumnTypes(java.lang.String parserType, ExtendedParameterMetaData signature, FarragoParamFieldMetaData[] supportedProvenanceColumns) throws java.lang.Exception
parserType
- Calling code's parser type, e.g., TYPE_CSVsignature
- User-declared column list for this foreign relationsupportedProvenanceColumns
- Provenance columns supported by callerjava.lang.Exception
public static boolean compareBytes(byte[] a, int aIdx, byte[] b, int bIdx, int numCheck)
public static void setStringColumn(byte[] buf, int start, int end, RowInserter stmt, int col, java.nio.charset.Charset characterSet, boolean treatEmptyStringAsNull) throws java.sql.SQLException
buf
- The byte array holding the contentstart
- The offset of the first content byteend
- The end of the contentstmt
- The target streamcol
- The 1-based column indexcharacterSet
- The character set of the target column.treatEmptyStringAsNull
- True if empty string means nu..java.sql.SQLException
public long getRowtimeBounds() throws java.lang.Exception
java.lang.Exception
protected long parseLong(byte[] buf, int start, int end) throws java.sql.SQLException
java.sql.SQLException
protected java.sql.SQLException invalidCharacterInNumber(char badCharacter, byte[] buf, int start, int end)
badCharacter
- The unexpected characterbuf
- Byte buffer containing offending charactersstart
- Start offset for the badly formatted numberend
- End offset for the badly formatted numberprotected java.sql.SQLException badCastFromChar(java.lang.String errorMessage)
errorMessage
- Message to include in SQLExceptionprotected java.sql.SQLException valueOutOfRange(java.lang.String errorMessage)
errorMessage
- Message to include in SQLExceptionprotected java.sql.SQLException invalidCharacterInTimestamp()
protected java.sql.SQLException invalidDateTime(java.lang.Throwable t)
t
- The trigger cause, if anyprotected java.sql.SQLException invalidDateTime(java.lang.String errorMessage, java.lang.Throwable t)
errorMessage
- The error messaget
- The trigger cause, if anyprotected java.sql.SQLException stringDataRightTruncation()
public void logThrowable(java.util.logging.Logger logger, java.lang.String refinedMessage, java.lang.Throwable ex, SqlStateCodes defaultSQLState, java.util.logging.Level level, boolean logStackTrace)
logger
- The trace loggerrefinedMessage
- Message to logex
- Throwable to logdefaultSQLState
- SQLState to use if none supplied by the Throwablelevel
- Error severitylogStackTrace
- True if we should include the stack trace in the logpublic void markForDiscarding(java.lang.String sqlState, java.lang.String errorMessage, java.lang.Throwable thrown)
public boolean mustDiscardThisRow()
public boolean shouldDiscardBadRows()
public void discardCurrentRow(java.util.logging.Logger discardTracer)
discardTracer
- The caller's trace Logger.protected void logStringTruncation(int col, byte[] buf, int start, int length)
col
- Column number which incurred the warningbuf
- Byte buffer containing the oversized stringstart
- Start offset of the string in the bufferlength
- The length of the stringpublic void logUnparsableBytes(java.util.logging.Logger discardTracer, java.lang.String message, java.lang.Throwable detailedError, int columnNumber, byte[] sourceBytes, int sourceBytesIdx, int length)
protected void parseDecimal(CommonRowParser.DecimalNumber out, byte[] buf, int start, int end, boolean sci) throws java.sql.SQLException
java.sql.SQLException
protected long parseDateTime(byte[] buf, int start, int end, int[] pattern) throws java.sql.SQLException
java.sql.SQLException
protected double parseDouble(byte[] buf, int start, int end, CommonRowParser.DecimalNumber dec) throws java.sql.SQLException
java.sql.SQLException
public boolean isLittleEndian(ExtendedParameterMetaData md, int param) throws java.sql.SQLException
md
- The metadata to interrogateparam
- The parameter numberjava.sql.SQLException
public TypeParser getTypeParser(int columnNo) throws java.sql.SQLException
java.sql.SQLException
protected void initDefaultTypeParsers(java.util.Properties props, java.util.Properties initProps) throws java.lang.Exception
java.lang.Exception
public void setCustomParser(java.lang.String colName, TypeParser parser)
protected void addParserInfoHandlers(java.util.ArrayList<CommonRowParser.ColumnUpdater> handlers) throws java.sql.SQLException
java.sql.SQLException
public void submit(RowInserter inputStmt) throws java.sql.SQLException
java.sql.SQLException
public void submit() throws java.sql.SQLException
java.sql.SQLException
public void submitRow(java.util.logging.Logger logger) throws java.sql.SQLException
java.sql.SQLException
public void setParserPosition(long pos)
public long getParserLineNumber()
public final java.lang.CharSequence getPositionKey()
public final java.lang.CharSequence getPositionKey(java.lang.String relationName, int columnNumber)
relationName
- Not null if we should include a destination relation name in the keycolumnNumber
- If NO_COLUMN_NUMBER, then don't include a column number in the keypublic long getRowNumberInPayload()
public java.lang.Object[] getPositionKeyFields(java.lang.String positionKey)
positionKey
- The string form of the position key.public void incParserLineNumber()
public void setParserLineNumber(long lineNo)
public void setParserError(java.lang.String err)
public void setExecutorService(java.util.concurrent.ExecutorService service)
DataInputParser
setExecutorService
in interface DataInputParser
service
- ExecuterService to usepublic void closeAllocation()
public boolean getBoolPropertyValue(java.lang.String key, boolean defVal)
public int getIntPropertyValue(java.lang.String key, int defVal) throws java.lang.NumberFormatException
java.lang.NumberFormatException
public void setInserter(RowInserter inserter) throws java.sql.SQLException
DataInputParser
setInserter
in interface DataInputParser
inserter
- RowInserter to send outputjava.sql.SQLException
Copyright (C) 2003-2021 SQLstream, Inc.