diff --git a/.gitattributes b/.gitattributes index a748d2ce..0a49a4b9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -31,3 +31,10 @@ Dockerfile* text # .gitattributes export-ignore .gitignore export-ignore + +# napi-rs auto-generates these files from the kernel's `napi-binding/napi/` +# crate; regenerated by `npm run build:native`. Tell git/GitHub they're +# machine-generated so they collapse in diffs and are excluded from +# blame and language stats. +native/sea/index.d.ts linguist-generated=true +native/sea/index.js linguist-generated=true diff --git a/.gitignore b/.gitignore index 99381ce5..c3801f4b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,12 @@ coverage_unit dist *.DS_Store lib/version.ts + +# SEA native binding — copied/generated from kernel workspace by `npm run build:native`. +# The committed contract is `native/sea/index.d.ts` (TypeScript declarations) and +# `native/sea/index.js` (the napi-rs platform router — small, stable, and required in +# the publish tarball so a missing build step can't ship a tarball that can't load). +# The `.node` binaries are large per-platform artifacts and must NOT be committed; +# in production they arrive via the `@databricks/sql-kernel-` optional deps. +native/sea/index.node +native/sea/index.*.node diff --git a/.npmignore b/.npmignore index 2bfe597c..448289a7 100644 --- a/.npmignore +++ b/.npmignore @@ -3,6 +3,13 @@ !dist/**/* !thrift/**/* +# SEA napi-rs router shim + TypeScript declarations. The router (index.js) +# selects the per-platform `.node` artifact from `@databricks/sql-kernel-*` +# optionalDependencies (populated when the kernel CI publishes them); +# the .d.ts is the consumer-facing type contract. +!native/sea/index.js +!native/sea/index.d.ts + !LICENSE !NOTICE !package.json diff --git a/.prettierignore b/.prettierignore index 9a9ec6bc..4a764095 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,9 @@ coverage dist thrift package-lock.json + +# Generated by napi-rs from the kernel's `napi-binding/napi/` crate; +# regenerated by `npm run build:native`. Format follows napi-rs's +# defaults (no semicolons), not this repo's prettier config. +native/sea/index.d.ts +native/sea/index.js diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 38d55a54..7c6430bc 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -1,9 +1,7 @@ import thrift from 'thrift'; -import Int64 from 'node-int64'; import { EventEmitter } from 'events'; import TCLIService from '../thrift/TCLIService'; -import { TProtocolVersion } from '../thrift/TCLIService_types'; import IDBSQLClient, { ClientOptions, ConnectionOptions, OpenSessionRequest } from './contracts/IDBSQLClient'; import IDriver from './contracts/IDriver'; import IClientContext, { ClientConfig } from './contracts/IClientContext'; @@ -14,9 +12,12 @@ import IDBSQLSession from './contracts/IDBSQLSession'; import IAuthentication from './connection/contracts/IAuthentication'; import HttpConnection from './connection/connections/HttpConnection'; import IConnectionOptions from './connection/contracts/IConnectionOptions'; -import Status from './dto/Status'; import HiveDriverError from './errors/HiveDriverError'; -import { buildUserAgentString, definedOrError, serializeQueryTags } from './utils'; +import { buildUserAgentString } from './utils'; +import IBackend from './contracts/IBackend'; +import { InternalConnectionOptions } from './contracts/InternalConnectionOptions'; +import ThriftBackend from './thrift-backend/ThriftBackend'; +import SeaBackend from './sea/SeaBackend'; import PlainHttpAuthentication from './connection/auth/PlainHttpAuthentication'; import DatabricksOAuth, { OAuthFlow } from './connection/auth/DatabricksOAuth'; import { @@ -39,19 +40,6 @@ function prependSlash(str: string): string { return str; } -function getInitialNamespaceOptions(catalogName?: string, schemaName?: string) { - if (!catalogName && !schemaName) { - return {}; - } - - return { - initialNamespace: { - catalogName, - schemaName, - }, - }; -} - export type ThriftLibrary = Pick; export default class DBSQLClient extends EventEmitter implements IDBSQLClient, IClientContext { @@ -75,6 +63,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I private readonly sessions = new CloseableCollection(); + private backend?: IBackend; + private static getDefaultLogger(): IDBSQLLogger { if (!this.defaultLogger) { this.defaultLogger = new DBSQLLogger(); @@ -248,38 +238,53 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.connectionProvider = this.createConnectionProvider(options); - const thriftConnection = await this.connectionProvider.getThriftConnection(); - - thriftConnection.on('error', (error: Error) => { - // Error.stack already contains error type and message, so log stack if available, - // otherwise fall back to just error type + message - this.logger.log(LogLevel.error, error.stack || `${error.name}: ${error.message}`); - try { - this.emit('error', error); - } catch (e) { - // EventEmitter will throw unhandled error when emitting 'error' event. - // Since we already logged it few lines above, just suppress this behaviour - } - }); - - thriftConnection.on('reconnecting', (params: { delay: number; attempt: number }) => { - this.logger.log(LogLevel.debug, `Reconnecting, params: ${JSON.stringify(params)}`); - this.emit('reconnecting', params); - }); - - thriftConnection.on('close', () => { - this.logger.log(LogLevel.debug, 'Closing connection.'); - this.emit('close'); - }); + // M0: `useSEA` is consumed via a non-exported internal-options cast so it + // doesn't ship in the public `.d.ts`. Mirrors Python's `kwargs.get("use_sea")` + // pattern (see databricks-sql-python/src/databricks/sql/session.py). + const internalOptions = options as ConnectionOptions & InternalConnectionOptions; + this.backend = internalOptions.useSEA + ? new SeaBackend() + : new ThriftBackend({ + context: this, + onConnectionEvent: (event, payload) => this.forwardConnectionEvent(event, payload), + }); - thriftConnection.on('timeout', () => { - this.logger.log(LogLevel.debug, 'Connection timed out.'); - this.emit('timeout'); - }); + await this.backend.connect(options); return this; } + private forwardConnectionEvent(event: 'error' | 'reconnecting' | 'close' | 'timeout', payload?: unknown): void { + switch (event) { + case 'error': { + const error = payload as Error; + this.logger.log(LogLevel.error, error.stack || `${error.name}: ${error.message}`); + try { + this.emit('error', error); + } catch (e) { + // EventEmitter throws when 'error' has no listeners; we've already logged it. + } + return; + } + case 'reconnecting': + this.logger.log(LogLevel.debug, `Reconnecting, params: ${JSON.stringify(payload)}`); + this.emit('reconnecting', payload); + return; + case 'close': + this.logger.log(LogLevel.debug, 'Closing connection.'); + this.emit('close'); + return; + case 'timeout': + this.logger.log(LogLevel.debug, 'Connection timed out.'); + this.emit('timeout'); + // Explicit return mirrors the other cases and protects against + // fall-through if a new event is added below. + // eslint-disable-next-line no-useless-return + return; + // no default + } + } + /** * Starts new session * @public @@ -290,44 +295,20 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I * const session = await client.openSession(); */ public async openSession(request: OpenSessionRequest = {}): Promise { - // Prepare session configuration - const configuration = request.configuration ? { ...request.configuration } : {}; - - // Add metric view metadata config if enabled - if (this.config.enableMetricViewMetadata) { - configuration['spark.sql.thriftserver.metadata.metricview.enabled'] = 'true'; - } - - // Serialize queryTags dict and set in configuration; takes precedence over configuration.QUERY_TAGS - if (request.queryTags !== undefined) { - const serialized = serializeQueryTags(request.queryTags); - if (serialized) { - configuration.QUERY_TAGS = serialized; - } else { - delete configuration.QUERY_TAGS; - } + if (!this.backend) { + throw new HiveDriverError('DBSQLClient: not connected'); } - - const response = await this.driver.openSession({ - client_protocol_i64: new Int64(TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V8), - ...getInitialNamespaceOptions(request.initialCatalog, request.initialSchema), - configuration, - canUseMultipleCatalogs: true, - }); - - Status.assert(response.status); - const session = new DBSQLSession({ - handle: definedOrError(response.sessionHandle), - context: this, - serverProtocolVersion: response.serverProtocolVersion, - }); + const sessionBackend = await this.backend.openSession(request); + const session = new DBSQLSession({ backend: sessionBackend, context: this }); this.sessions.add(session); return session; } public async close(): Promise { await this.sessions.closeAll(); + await this.backend?.close(); + this.backend = undefined; this.client = undefined; this.connectionProvider = undefined; this.authProvider = undefined; diff --git a/lib/DBSQLOperation.ts b/lib/DBSQLOperation.ts index fe22995d..21b8f0fd 100644 --- a/lib/DBSQLOperation.ts +++ b/lib/DBSQLOperation.ts @@ -1,4 +1,3 @@ -import { stringify, NIL } from 'uuid'; import { Readable } from 'node:stream'; import IOperation, { FetchOptions, @@ -12,91 +11,45 @@ import IOperation, { } from './contracts/IOperation'; import { TGetOperationStatusResp, - TOperationHandle, - TTableSchema, - TSparkDirectResults, TGetResultSetMetadataResp, - TSparkRowSetType, - TCloseOperationResp, - TOperationState, + TTableSchema, } from '../thrift/TCLIService_types'; import Status from './dto/Status'; import { LogLevel } from './contracts/IDBSQLLogger'; import OperationStateError, { OperationStateErrorCode } from './errors/OperationStateError'; -import IResultsProvider from './result/IResultsProvider'; -import RowSetProvider from './result/RowSetProvider'; -import JsonResultHandler from './result/JsonResultHandler'; -import ArrowResultHandler from './result/ArrowResultHandler'; -import CloudFetchResultHandler from './result/CloudFetchResultHandler'; -import ArrowResultConverter from './result/ArrowResultConverter'; -import ResultSlicer from './result/ResultSlicer'; -import { definedOrError } from './utils'; import { OperationChunksIterator, OperationRowsIterator } from './utils/OperationIterator'; -import HiveDriverError from './errors/HiveDriverError'; import IClientContext from './contracts/IClientContext'; +import IOperationBackend from './contracts/IOperationBackend'; +import { ResultMetadata } from './contracts/ResultMetadata'; +import ThriftOperationBackend from './thrift-backend/ThriftOperationBackend'; +import { synthesizeThriftStatus, synthesizeThriftResultSetMetadata } from './utils/thriftWireSynthesis'; interface DBSQLOperationConstructorOptions { - handle: TOperationHandle; - directResults?: TSparkDirectResults; + backend: IOperationBackend; context: IClientContext; } -async function delay(ms?: number): Promise { - return new Promise((resolve) => { - setTimeout(() => { - resolve(); - }, ms); - }); -} - export default class DBSQLOperation implements IOperation { private readonly context: IClientContext; - private readonly operationHandle: TOperationHandle; + private readonly backend: IOperationBackend; public onClose?: () => void; - private readonly _data: RowSetProvider; - - private readonly closeOperation?: TCloseOperationResp; - private closed: boolean = false; private cancelled: boolean = false; - private metadata?: TGetResultSetMetadataResp; - - private metadataPromise?: Promise; - - private state: TOperationState = TOperationState.INITIALIZED_STATE; - - // Once operation is finished or fails - cache status response, because subsequent calls - // to `getOperationStatus()` may fail with irrelevant errors, e.g. HTTP 404 - private operationStatus?: TGetOperationStatusResp; - - private resultHandler?: ResultSlicer; - - constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { - this.operationHandle = handle; - this.context = context; - - const useOnlyPrefetchedResults = Boolean(directResults?.closeOperation); - - if (directResults?.operationStatus) { - this.processOperationStatusResponse(directResults.operationStatus); - } - - this.metadata = directResults?.resultSetMetadata; - this._data = new RowSetProvider( - this.context, - this.operationHandle, - [directResults?.resultSet], - useOnlyPrefetchedResults, - ); - this.closeOperation = directResults?.closeOperation; + constructor(options: DBSQLOperationConstructorOptions) { + this.context = options.context; + this.backend = options.backend; this.context.getLogger().log(LogLevel.debug, `Operation created with id: ${this.id}`); } + public get id() { + return this.backend.id; + } + public iterateChunks(options?: IteratorOptions): IOperationChunksIterator { return new OperationChunksIterator(this, options); } @@ -122,11 +75,6 @@ export default class DBSQLOperation implements IOperation { return Readable.from(iterable, options?.streamOptions); } - public get id() { - const operationId = this.operationHandle?.operationId?.guid; - return operationId ? stringify(operationId) : NIL; - } - /** * Fetches all data * @public @@ -141,8 +89,6 @@ export default class DBSQLOperation implements IOperation { const fetchChunkOptions = { ...options, - // Tell slicer to return raw chunks. We're going to process all of them anyway, - // so no need to additionally buffer and slice chunks returned by server disableBuffering: true, }; @@ -168,70 +114,44 @@ export default class DBSQLOperation implements IOperation { public async fetchChunk(options?: FetchOptions): Promise> { await this.failIfClosed(); - if (!this.operationHandle.hasResultSet) { + if (!this.backend.hasResultSet) { return []; } - await this.waitUntilReady(options); - - const resultHandler = await this.getResultHandler(); + await this.waitUntilReadyThroughBackend(options); await this.failIfClosed(); - // All the library code is Promise-based, however, since Promises are microtasks, - // enqueueing a lot of promises may block macrotasks execution for a while. - // Usually, there are no much microtasks scheduled, however, when fetching query - // results (especially CloudFetch ones) it's quite easy to block event loop for - // long enough to break a lot of things. For example, with CloudFetch, after first - // set of files are downloaded and being processed immediately one by one, event - // loop easily gets blocked for enough time to break connection pool. `http.Agent` - // stops receiving socket events, and marks all sockets invalid on the next attempt - // to use them. See these similar issues that helped to debug this particular case - - // https://github.com/nodejs/node/issues/47130 and https://github.com/node-fetch/node-fetch/issues/1735 - // This simple fix allows to clean up a microtasks queue and allow Node to process - // macrotasks as well, allowing the normal operation of other code. Also, this - // fix is added to `fetchChunk` method because, unlike other methods, `fetchChunk` is - // a potential source of issues described above - await new Promise((resolve) => { - setTimeout(resolve, 0); - }); - const defaultMaxRows = this.context.getConfig().fetchChunkDefaultMaxRows; - - const result = resultHandler.fetchNext({ - limit: options?.maxRows ?? defaultMaxRows, - disableBuffering: options?.disableBuffering, - }); + const limit = options?.maxRows ?? defaultMaxRows; + const result = await this.backend.fetchChunk({ limit, disableBuffering: options?.disableBuffering }); await this.failIfClosed(); - this.context - .getLogger() - .log( - LogLevel.debug, - `Fetched chunk of size: ${options?.maxRows ?? defaultMaxRows} from operation with id: ${this.id}`, - ); + this.context.getLogger().log(LogLevel.debug, `Fetched chunk of size: ${limit} from operation with id: ${this.id}`); return result; } /** - * Requests operation status + * Requests operation status. Returns the Thrift wire response for + * back-compat with existing user code. On the Thrift backend the response + * is returned verbatim; on any other backend (e.g. SEA) the response is + * synthesized from the neutral {@link IOperationBackend.status} result, + * with Thrift-only fields (`taskStatus`, `numModifiedRows`, etc.) left + * undefined. + * * @param progress * @throws {StatusError} */ public async status(progress: boolean = false): Promise { await this.failIfClosed(); this.context.getLogger().log(LogLevel.debug, `Fetching status for operation with id: ${this.id}`); - - if (this.operationStatus) { - return this.operationStatus; + if (this.backend instanceof ThriftOperationBackend) { + // Zero-loss path: the Thrift backend has the wire response on hand. + return this.backend.thriftStatusResponse(progress); } - - const driver = await this.context.getDriver(); - const response = await driver.getOperationStatus({ - operationHandle: this.operationHandle, - getProgressUpdate: progress, - }); - - return this.processOperationStatusResponse(response); + // Non-Thrift backend: synthesize the Thrift-shaped response from the + // neutral OperationStatus DTO. + const status = await this.backend.status(progress); + return synthesizeThriftStatus(status); } /** @@ -242,18 +162,8 @@ export default class DBSQLOperation implements IOperation { if (this.closed || this.cancelled) { return Status.success(); } - - this.context.getLogger().log(LogLevel.debug, `Cancelling operation with id: ${this.id}`); - - const driver = await this.context.getDriver(); - const response = await driver.cancelOperation({ - operationHandle: this.operationHandle, - }); - Status.assert(response.status); + const result = await this.backend.cancel(); this.cancelled = true; - const result = new Status(response.status); - - // Cancelled operation becomes unusable, similarly to being closed this.onClose?.(); return result; } @@ -266,63 +176,66 @@ export default class DBSQLOperation implements IOperation { if (this.closed || this.cancelled) { return Status.success(); } - - this.context.getLogger().log(LogLevel.debug, `Closing operation with id: ${this.id}`); - - const driver = await this.context.getDriver(); - const response = - this.closeOperation ?? - (await driver.closeOperation({ - operationHandle: this.operationHandle, - })); - Status.assert(response.status); + const result = await this.backend.close(); this.closed = true; - const result = new Status(response.status); - this.onClose?.(); return result; } public async finished(options?: FinishedOptions): Promise { await this.failIfClosed(); - await this.waitUntilReady(options); + await this.waitUntilReadyThroughBackend(options); } public async hasMoreRows(): Promise { - // If operation is closed or cancelled - we should not try to get data from it if (this.closed || this.cancelled) { return false; } - // Wait for operation to finish before checking for more rows - // This ensures metadata can be fetched successfully - if (this.operationHandle.hasResultSet) { - await this.waitUntilReady(); + if (this.backend.hasResultSet) { + await this.waitUntilReadyThroughBackend(); } - // If we fetched all the data from server - check if there's anything buffered in result handler - const resultHandler = await this.getResultHandler(); - return resultHandler.hasMore(); + return this.backend.hasMore(); } public async getSchema(options?: GetSchemaOptions): Promise { await this.failIfClosed(); - if (!this.operationHandle.hasResultSet) { + if (!this.backend.hasResultSet) { return null; } - await this.waitUntilReady(options); + await this.waitUntilReadyThroughBackend(options); this.context.getLogger().log(LogLevel.debug, `Fetching schema for operation with id: ${this.id}`); - const metadata = await this.fetchMetadata(); + const metadata = await this.backend.getResultMetadata(); return metadata.schema ?? null; } + public async getResultMetadata(): Promise { + await this.failIfClosed(); + await this.waitUntilReadyThroughBackend(); + return this.backend.getResultMetadata(); + } + + /** + * Fetch result-set metadata as the Thrift wire response. Kept for + * back-compat with existing user code. On the Thrift backend the wire + * response is returned verbatim; on any other backend the response is + * synthesized from the neutral {@link ResultMetadata}, with Thrift-only + * fields (`cacheLookupResult`, `uncompressedBytes`, `compressedBytes`, + * `status`) left undefined / defaulted. + * + * Prefer {@link DBSQLOperation.getResultMetadata} in new code. + */ public async getMetadata(): Promise { await this.failIfClosed(); - await this.waitUntilReady(); - return this.fetchMetadata(); + await this.waitUntilReadyThroughBackend(); + if (this.backend instanceof ThriftOperationBackend) { + return this.backend.thriftResultMetadataResponse(); + } + return synthesizeThriftResultSetMetadata(await this.backend.getResultMetadata()); } private async failIfClosed(): Promise { @@ -334,151 +247,20 @@ export default class DBSQLOperation implements IOperation { } } - private async waitUntilReady(options?: WaitUntilReadyOptions) { - if (this.state === TOperationState.FINISHED_STATE) { - return; - } - - let isReady = false; - - while (!isReady) { - // eslint-disable-next-line no-await-in-loop - const response = await this.status(Boolean(options?.progress)); - - if (options?.callback) { - // eslint-disable-next-line no-await-in-loop - await Promise.resolve(options.callback(response)); - } - - switch (response.operationState) { - // For these states do nothing and continue waiting - case TOperationState.INITIALIZED_STATE: - case TOperationState.PENDING_STATE: - case TOperationState.RUNNING_STATE: - break; - - // Operation is completed, so exit the loop - case TOperationState.FINISHED_STATE: - isReady = true; - break; - - // Operation was cancelled, so set a flag and exit the loop (throw an error) - case TOperationState.CANCELED_STATE: + private async waitUntilReadyThroughBackend(options?: WaitUntilReadyOptions) { + try { + await this.backend.waitUntilReady(options); + } catch (err) { + // Reflect terminal states back into facade flags so subsequent calls + // short-circuit via failIfClosed(). + if (err instanceof OperationStateError) { + if (err.errorCode === OperationStateErrorCode.Canceled) { this.cancelled = true; - throw new OperationStateError(OperationStateErrorCode.Canceled, response); - - // Operation was closed, so set a flag and exit the loop (throw an error) - case TOperationState.CLOSED_STATE: + } else if (err.errorCode === OperationStateErrorCode.Closed) { this.closed = true; - throw new OperationStateError(OperationStateErrorCode.Closed, response); - - // Error states - throw and exit the loop - case TOperationState.ERROR_STATE: - throw new OperationStateError(OperationStateErrorCode.Error, response); - case TOperationState.TIMEDOUT_STATE: - throw new OperationStateError(OperationStateErrorCode.Timeout, response); - case TOperationState.UKNOWN_STATE: - default: - throw new OperationStateError(OperationStateErrorCode.Unknown, response); + } } - - // If not ready yet - make some delay before the next status requests - if (!isReady) { - // eslint-disable-next-line no-await-in-loop - await delay(100); - } - } - } - - private async fetchMetadata() { - // If metadata is already cached, return it immediately - if (this.metadata) { - return this.metadata; + throw err; } - - // If a fetch is already in progress, wait for it to complete - if (this.metadataPromise) { - return this.metadataPromise; - } - - // Start a new fetch and cache the promise to prevent concurrent fetches - this.metadataPromise = (async () => { - const driver = await this.context.getDriver(); - const metadata = await driver.getResultSetMetadata({ - operationHandle: this.operationHandle, - }); - Status.assert(metadata.status); - this.metadata = metadata; - return metadata; - })(); - - try { - return await this.metadataPromise; - } finally { - // Clear the promise once completed (success or failure) - this.metadataPromise = undefined; - } - } - - private async getResultHandler(): Promise> { - const metadata = await this.fetchMetadata(); - const resultFormat = definedOrError(metadata.resultFormat); - - if (!this.resultHandler) { - let resultSource: IResultsProvider> | undefined; - - switch (resultFormat) { - case TSparkRowSetType.COLUMN_BASED_SET: - resultSource = new JsonResultHandler(this.context, this._data, metadata); - break; - case TSparkRowSetType.ARROW_BASED_SET: - resultSource = new ArrowResultConverter( - this.context, - new ArrowResultHandler(this.context, this._data, metadata), - metadata, - ); - break; - case TSparkRowSetType.URL_BASED_SET: - resultSource = new ArrowResultConverter( - this.context, - new CloudFetchResultHandler(this.context, this._data, metadata), - metadata, - ); - break; - // no default - } - - if (resultSource) { - this.resultHandler = new ResultSlicer(this.context, resultSource); - } - } - - if (!this.resultHandler) { - throw new HiveDriverError(`Unsupported result format: ${TSparkRowSetType[resultFormat]}`); - } - - return this.resultHandler; - } - - private processOperationStatusResponse(response: TGetOperationStatusResp) { - Status.assert(response.status); - - this.state = response.operationState ?? this.state; - - if (typeof response.hasResultSet === 'boolean') { - this.operationHandle.hasResultSet = response.hasResultSet; - } - - const isInProgress = [ - TOperationState.INITIALIZED_STATE, - TOperationState.PENDING_STATE, - TOperationState.RUNNING_STATE, - ].includes(this.state); - - if (!isInProgress) { - this.operationStatus = response; - } - - return response; } } diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index 95715e1b..0e1cc934 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -2,19 +2,7 @@ import * as fs from 'fs'; import * as path from 'path'; import stream from 'node:stream'; import util from 'node:util'; -import { stringify, NIL } from 'uuid'; -import Int64 from 'node-int64'; import fetch, { HeadersInit } from 'node-fetch'; -import { - TSessionHandle, - TStatus, - TOperationHandle, - TSparkDirectResults, - TSparkArrowTypes, - TSparkParameter, - TProtocolVersion, - TExecuteStatementReq, -} from '../thrift/TCLIService_types'; import IDBSQLSession, { ExecuteStatementOptions, TypeInfoRequest, @@ -31,153 +19,44 @@ import IOperation from './contracts/IOperation'; import DBSQLOperation from './DBSQLOperation'; import Status from './dto/Status'; import InfoValue from './dto/InfoValue'; -import { definedOrError, LZ4, ProtocolVersion, serializeQueryTags } from './utils'; import CloseableCollection from './utils/CloseableCollection'; import { LogLevel } from './contracts/IDBSQLLogger'; import HiveDriverError from './errors/HiveDriverError'; import StagingError from './errors/StagingError'; -import { DBSQLParameter, DBSQLParameterValue } from './DBSQLParameter'; -import ParameterError from './errors/ParameterError'; -import IClientContext, { ClientConfig } from './contracts/IClientContext'; +import IClientContext from './contracts/IClientContext'; +import ISessionBackend from './contracts/ISessionBackend'; +import IOperationBackend from './contracts/IOperationBackend'; // Explicitly promisify a callback-style `pipeline` because `node:stream/promises` is not available in Node 14 const pipeline = util.promisify(stream.pipeline); -interface OperationResponseShape { - status: TStatus; - operationHandle?: TOperationHandle; - directResults?: TSparkDirectResults; -} - -export function numberToInt64(value: number | bigint | Int64): Int64 { - if (value instanceof Int64) { - return value; - } - - if (typeof value === 'bigint') { - const buffer = new ArrayBuffer(BigInt64Array.BYTES_PER_ELEMENT); - const view = new DataView(buffer); - view.setBigInt64(0, value, false); // `false` to use big-endian order - return new Int64(Buffer.from(buffer)); - } - - return new Int64(value); -} - -function getDirectResultsOptions(maxRows: number | bigint | Int64 | null | undefined, config: ClientConfig) { - if (maxRows === null) { - return {}; - } - - return { - getDirectResults: { - maxRows: numberToInt64(maxRows ?? config.directResultsDefaultMaxRows), - }, - }; -} - -function getArrowOptions( - config: ClientConfig, - serverProtocolVersion: TProtocolVersion | undefined | null, -): { - canReadArrowResult: boolean; - useArrowNativeTypes?: TSparkArrowTypes; -} { - const { arrowEnabled = true, useArrowNativeTypes = true } = config; - - if (!arrowEnabled || !ProtocolVersion.supportsArrowMetadata(serverProtocolVersion)) { - return { - canReadArrowResult: false, - }; - } - - return { - canReadArrowResult: true, - useArrowNativeTypes: { - timestampAsArrow: useArrowNativeTypes, - decimalAsArrow: useArrowNativeTypes, - complexTypesAsArrow: useArrowNativeTypes, - // TODO: currently unsupported by `apache-arrow` (see https://github.com/streamlit/streamlit/issues/4489) - intervalTypesAsArrow: false, - }, - }; -} - -function getQueryParameters( - namedParameters?: Record, - ordinalParameters?: Array, -): Array { - const namedParametersProvided = namedParameters !== undefined && Object.keys(namedParameters).length > 0; - const ordinalParametersProvided = ordinalParameters !== undefined && ordinalParameters.length > 0; - - if (namedParametersProvided && ordinalParametersProvided) { - throw new ParameterError('Driver does not support both ordinal and named parameters.'); - } - - if (!namedParametersProvided && !ordinalParametersProvided) { - return []; - } - - const result: Array = []; - - if (namedParameters !== undefined) { - for (const name of Object.keys(namedParameters)) { - const value = namedParameters[name]; - const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value }); - result.push(param.toSparkParameter({ name })); - } - } - - if (ordinalParameters !== undefined) { - for (const value of ordinalParameters) { - const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value }); - result.push(param.toSparkParameter()); - } - } - - return result; -} +// Re-export for back-compat with existing imports. +export { numberToInt64 } from './thrift-backend/ThriftSessionBackend'; interface DBSQLSessionConstructorOptions { - handle: TSessionHandle; + backend: ISessionBackend; context: IClientContext; - serverProtocolVersion?: TProtocolVersion; } export default class DBSQLSession implements IDBSQLSession { private readonly context: IClientContext; - private readonly sessionHandle: TSessionHandle; + private readonly backend: ISessionBackend; private isOpen = true; - private serverProtocolVersion?: TProtocolVersion; - public onClose?: () => void; private operations = new CloseableCollection(); - /** - * Helper method to determine if runAsync should be set for metadata operations - * @private - * @returns true if supported by protocol version, undefined otherwise - */ - private getRunAsyncForMetadataOperations(): boolean | undefined { - return ProtocolVersion.supportsAsyncMetadataOperations(this.serverProtocolVersion) ? true : undefined; - } - - constructor({ handle, context, serverProtocolVersion }: DBSQLSessionConstructorOptions) { - this.sessionHandle = handle; - this.context = context; - // Get the server protocol version from the provided parameter (from TOpenSessionResp) - this.serverProtocolVersion = serverProtocolVersion; + constructor(options: DBSQLSessionConstructorOptions) { + this.context = options.context; + this.backend = options.backend; this.context.getLogger().log(LogLevel.debug, `Session created with id: ${this.id}`); - this.context.getLogger().log(LogLevel.debug, `Server protocol version: ${this.serverProtocolVersion}`); } public get id() { - const sessionId = this.sessionHandle?.sessionId?.guid; - return sessionId ? stringify(sessionId) : NIL; + return this.backend.id; } /** @@ -190,14 +69,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getInfo(infoType: number): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const operationPromise = driver.getInfo({ - sessionHandle: this.sessionHandle, - infoType, - }); - const response = await this.handleResponse(operationPromise); - Status.assert(response.status); - return new InfoValue(response.infoValue); + const result = await this.backend.getInfo(infoType); + await this.failIfClosed(); + return result; } /** @@ -211,48 +85,13 @@ export default class DBSQLSession implements IDBSQLSession { */ public async executeStatement(statement: string, options: ExecuteStatementOptions = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const request = new TExecuteStatementReq({ - sessionHandle: this.sessionHandle, - statement, - queryTimeout: options.queryTimeout ? numberToInt64(options.queryTimeout) : undefined, - runAsync: true, - ...getDirectResultsOptions(options.maxRows, clientConfig), - ...getArrowOptions(clientConfig, this.serverProtocolVersion), - }); - - if (ProtocolVersion.supportsParameterizedQueries(this.serverProtocolVersion)) { - request.parameters = getQueryParameters(options.namedParameters, options.ordinalParameters); - } - - const serializedQueryTags = serializeQueryTags(options.queryTags); - if (serializedQueryTags !== undefined) { - request.confOverlay = { ...request.confOverlay, query_tags: serializedQueryTags }; - } - - if (ProtocolVersion.supportsCloudFetch(this.serverProtocolVersion)) { - request.canDownloadResult = options.useCloudFetch ?? clientConfig.useCloudFetch; - } - - if (ProtocolVersion.supportsArrowCompression(this.serverProtocolVersion) && request.canDownloadResult !== true) { - request.canDecompressLZ4Result = (options.useLZ4Compression ?? clientConfig.useLZ4Compression) && Boolean(LZ4()); - } + const opBackend = await this.backend.executeStatement(statement, options); + await this.failIfClosed(); + const operation = this.wrapOperation(opBackend); - const operationPromise = driver.executeStatement(request); - const response = await this.handleResponse(operationPromise); - const operation = this.createOperation(response); - - // If `stagingAllowedLocalPath` is provided - assume that operation possibly may be a staging operation. - // To know for sure, fetch metadata and check a `isStagingOperation` flag. If it happens that it wasn't - // a staging operation - not a big deal, we just fetched metadata earlier, but operation is still usable - // and user can get data from it. - // If `stagingAllowedLocalPath` is not provided - don't do anything to the operation. In a case of regular - // operation, everything will work as usual. In a case of staging operation, it will be processed like any - // other query - it will be possible to get data from it as usual, or use other operation methods. + // Staging detection: only run when stagingAllowedLocalPath is provided. if (options.stagingAllowedLocalPath !== undefined) { - const metadata = await operation.getMetadata(); + const metadata = await operation.getResultMetadata(); if (metadata.isStagingOperation) { const allowedLocalPath = Array.isArray(options.stagingAllowedLocalPath) ? options.stagingAllowedLocalPath @@ -276,7 +115,6 @@ export default class DBSQLSession implements IDBSQLSession { } const row = rows[0] as StagingResponse; - // For REMOVE operation local file is not available, so no need to validate it if (row.localFile !== undefined) { let allowOperation = false; @@ -328,7 +166,6 @@ export default class DBSQLSession implements IDBSQLSession { } const fileStream = fs.createWriteStream(localFile); - // `pipeline` will do all the dirty job for us, including error handling and closing all the streams properly return pipeline(response.body, fileStream); } @@ -337,13 +174,6 @@ export default class DBSQLSession implements IDBSQLSession { const agent = await connectionProvider.getAgent(); const response = await fetch(presignedUrl, { method: 'DELETE', headers, agent }); - // Looks that AWS and Azure have a different behavior of HTTP `DELETE` for non-existing files. - // AWS assumes that - since file already doesn't exist - the goal is achieved, and returns HTTP 200. - // Azure, on the other hand, is somewhat stricter and check if file exists before deleting it. And if - // file doesn't exist - Azure returns HTTP 404. - // - // For us, it's totally okay if file didn't exist before removing. So when we get an HTTP 404 - - // just ignore it and report success. This way we can have a uniform library behavior for all clouds if (!response.ok && response.status !== 404) { throw new StagingError(`HTTP error ${response.status} ${response.statusText}`); } @@ -368,7 +198,6 @@ export default class DBSQLSession implements IDBSQLSession { method: 'PUT', headers: { ...headers, - // This header is required by server 'Content-Length': fileInfo.size.toString(), }, agent, @@ -387,16 +216,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getTypeInfo(request: TypeInfoRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getTypeInfo({ - sessionHandle: this.sessionHandle, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getTypeInfo(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -407,16 +229,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getCatalogs(request: CatalogsRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getCatalogs({ - sessionHandle: this.sessionHandle, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getCatalogs(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -427,18 +242,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getSchemas(request: SchemasRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getSchemas({ - sessionHandle: this.sessionHandle, - catalogName: request.catalogName, - schemaName: request.schemaName, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getSchemas(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -449,20 +255,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getTables(request: TablesRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getTables({ - sessionHandle: this.sessionHandle, - catalogName: request.catalogName, - schemaName: request.schemaName, - tableName: request.tableName, - tableTypes: request.tableTypes, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getTables(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -473,16 +268,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getTableTypes(request: TableTypesRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getTableTypes({ - sessionHandle: this.sessionHandle, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getTableTypes(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -493,20 +281,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getColumns(request: ColumnsRequest = {}): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getColumns({ - sessionHandle: this.sessionHandle, - catalogName: request.catalogName, - schemaName: request.schemaName, - tableName: request.tableName, - columnName: request.columnName, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getColumns(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -517,36 +294,16 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getFunctions(request: FunctionsRequest): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getFunctions({ - sessionHandle: this.sessionHandle, - catalogName: request.catalogName, - schemaName: request.schemaName, - functionName: request.functionName, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getFunctions(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } public async getPrimaryKeys(request: PrimaryKeysRequest): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getPrimaryKeys({ - sessionHandle: this.sessionHandle, - catalogName: request.catalogName, - schemaName: request.schemaName, - tableName: request.tableName, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getPrimaryKeys(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -557,22 +314,9 @@ export default class DBSQLSession implements IDBSQLSession { */ public async getCrossReference(request: CrossReferenceRequest): Promise { await this.failIfClosed(); - const driver = await this.context.getDriver(); - const clientConfig = this.context.getConfig(); - - const operationPromise = driver.getCrossReference({ - sessionHandle: this.sessionHandle, - parentCatalogName: request.parentCatalogName, - parentSchemaName: request.parentSchemaName, - parentTableName: request.parentTableName, - foreignCatalogName: request.foreignCatalogName, - foreignSchemaName: request.foreignSchemaName, - foreignTableName: request.foreignTableName, - runAsync: this.getRunAsyncForMetadataOperations(), - ...getDirectResultsOptions(request.maxRows, clientConfig), - }); - const response = await this.handleResponse(operationPromise); - return this.createOperation(response); + const opBackend = await this.backend.getCrossReference(request); + await this.failIfClosed(); + return this.wrapOperation(opBackend); } /** @@ -585,35 +329,20 @@ export default class DBSQLSession implements IDBSQLSession { return Status.success(); } - // Close owned operations one by one, removing successfully closed ones from the list await this.operations.closeAll(); - const driver = await this.context.getDriver(); - const response = await driver.closeSession({ - sessionHandle: this.sessionHandle, - }); - // check status for being successful - Status.assert(response.status); + const status = await this.backend.close(); - // notify owner connection this.onClose?.(); this.isOpen = false; this.context.getLogger().log(LogLevel.debug, `Session closed with id: ${this.id}`); - return new Status(response.status); + return status; } - private createOperation(response: OperationResponseShape): DBSQLOperation { - Status.assert(response.status); - const handle = definedOrError(response.operationHandle); - const operation = new DBSQLOperation({ - handle, - directResults: response.directResults, - context: this.context, - }); - + private wrapOperation(backend: IOperationBackend): DBSQLOperation { + const operation = new DBSQLOperation({ backend, context: this.context }); this.operations.add(operation); - return operation; } @@ -622,13 +351,4 @@ export default class DBSQLSession implements IDBSQLSession { throw new HiveDriverError('The session was closed or has expired'); } } - - private async handleResponse(requestPromise: Promise): Promise { - // Currently, after being closed sessions remains usable - server will not - // error out when trying to run operations on closed session. So it's - // basically useless to process any errors here - const result = await requestPromise; - await this.failIfClosed(); - return result; - } } diff --git a/lib/contracts/IBackend.ts b/lib/contracts/IBackend.ts new file mode 100644 index 00000000..2e5edd16 --- /dev/null +++ b/lib/contracts/IBackend.ts @@ -0,0 +1,34 @@ +import { ConnectionOptions, OpenSessionRequest } from './IDBSQLClient'; +import ISessionBackend from './ISessionBackend'; + +/** + * Top-level backend dispatch handle. One instance per `DBSQLClient`, + * chosen at `connect()` time based on the `useSEA` flag and never + * re-selected per-call. + */ +export default interface IBackend { + /** + * Establish backend-level state before any session is opened. Implementations + * consume `options` to build backend-specific connection parameters (e.g. the + * SEA backend derives napi-binding `SeaNativeConnectionOptions` from the auth + * + host fields here). Transport-layer connection providers are owned by + * `DBSQLClient` (via `IClientContext`) and exposed to backends through + * constructor injection. + */ + connect(options: ConnectionOptions): Promise; + + /** + * Open a session. Returned `ISessionBackend` is owned by the caller + * and torn down via its own `close()`. + */ + openSession(request: OpenSessionRequest): Promise; + + /** + * Backend-level teardown. Transport-layer cleanup (connection provider, + * thrift client, auth provider) is owned by `DBSQLClient` and runs + * after this returns. Implementations release backend-internal resources + * here, and MUST be safe to call on a partially-initialized backend + * (i.e. after a failed `connect()`). + */ + close(): Promise; +} diff --git a/lib/contracts/IOperation.ts b/lib/contracts/IOperation.ts index 1d0bb9a1..bbeed622 100644 --- a/lib/contracts/IOperation.ts +++ b/lib/contracts/IOperation.ts @@ -1,6 +1,7 @@ import { Readable, ReadableOptions } from 'node:stream'; import { TGetOperationStatusResp, TTableSchema } from '../../thrift/TCLIService_types'; import Status from '../dto/Status'; +import { ResultMetadata } from './ResultMetadata'; export type OperationStatusCallback = (progress: TGetOperationStatusResp) => unknown; @@ -59,7 +60,10 @@ export default interface IOperation { fetchAll(options?: FetchOptions): Promise>; /** - * Request status of operation + * Request status of operation. Returns the Thrift wire response for + * back-compat. New code should prefer {@link IOperation.getResultMetadata} + * for metadata and may consume the neutral `IOperationBackend.status` via + * a typed downcast when implementing alternative backends. * * @param progress */ @@ -90,6 +94,12 @@ export default interface IOperation { */ getSchema(options?: GetSchemaOptions): Promise; + /** + * Fetch result-set metadata in the backend-neutral `ResultMetadata` shape. + * Prefer this over the Thrift-shaped surface for new code. + */ + getResultMetadata(): Promise; + iterateChunks(options?: IteratorOptions): IOperationChunksIterator; iterateRows(options?: IteratorOptions): IOperationRowsIterator; diff --git a/lib/contracts/IOperationBackend.ts b/lib/contracts/IOperationBackend.ts new file mode 100644 index 00000000..4c17020b --- /dev/null +++ b/lib/contracts/IOperationBackend.ts @@ -0,0 +1,55 @@ +import Status from '../dto/Status'; +import { WaitUntilReadyOptions } from './IOperation'; +import { OperationStatus } from './OperationStatus'; +import { ResultMetadata } from './ResultMetadata'; + +/** + * What a `DBSQLOperation` needs from its backend. Returned by + * `ISessionBackend.executeStatement` and the metadata methods. + */ +export default interface IOperationBackend { + /** Operation identifier. */ + readonly id: string; + + /** + * Whether this operation has a result set. Initial value may be derived + * from the create-operation response; implementations MUST refresh it + * from terminal status responses (the Thrift impl updates + * `operationHandle.hasResultSet` inside `processOperationStatusResponse`). + * `readonly` here means external callers cannot reassign the property — + * not that the underlying value is fixed at construction time. + */ + readonly hasResultSet: boolean; + + /** Fetch the next chunk of result rows. */ + fetchChunk(options: { limit: number; disableBuffering?: boolean }): Promise>; + + /** Whether more rows are available beyond what has been fetched. */ + hasMore(): Promise; + + /** + * Poll the backend until the operation reaches a terminal state. + * + * MUST throw `OperationStateError` (with one of `OperationStateErrorCode.{Canceled, + * Closed, Error, Timeout, Unknown}`) on terminal non-success states. The + * `DBSQLOperation` facade depends on `Canceled` and `Closed` codes to mirror + * the operation into its closed/cancelled flags; future implementations must + * use the same error type for the facade to stay in sync. + */ + waitUntilReady(options?: WaitUntilReadyOptions): Promise; + + /** + * Fetch operation status as a neutral `OperationStatus`. Pass `progress: true` + * to request that the backend include a progress payload. + */ + status(progress: boolean): Promise; + + /** Fetch result-set metadata (schema, format, lz4 flag, arrow schema, staging flag). */ + getResultMetadata(): Promise; + + /** Cancel the operation. */ + cancel(): Promise; + + /** Close the operation. Idempotent. */ + close(): Promise; +} diff --git a/lib/contracts/ISessionBackend.ts b/lib/contracts/ISessionBackend.ts new file mode 100644 index 00000000..2404dc68 --- /dev/null +++ b/lib/contracts/ISessionBackend.ts @@ -0,0 +1,60 @@ +import IOperationBackend from './IOperationBackend'; +import { + ExecuteStatementOptions, + TypeInfoRequest, + CatalogsRequest, + SchemasRequest, + TablesRequest, + TableTypesRequest, + ColumnsRequest, + FunctionsRequest, + PrimaryKeysRequest, + CrossReferenceRequest, +} from './IDBSQLSession'; +import Status from '../dto/Status'; +import InfoValue from '../dto/InfoValue'; + +/** + * What a `DBSQLSession` needs from its backend. Returned by + * `IBackend.openSession()`. Lifecycle tied to a single `DBSQLSession`. + */ +export default interface ISessionBackend { + /** Session identifier. */ + readonly id: string; + + /** Returns general information about the data source. */ + getInfo(infoType: number): Promise; + + /** Executes DDL/DML statements. */ + executeStatement(statement: string, options: ExecuteStatementOptions): Promise; + + /** Information about supported data types. */ + getTypeInfo(request: TypeInfoRequest): Promise; + + /** List of catalogs. */ + getCatalogs(request: CatalogsRequest): Promise; + + /** List of schemas. */ + getSchemas(request: SchemasRequest): Promise; + + /** List of tables. */ + getTables(request: TablesRequest): Promise; + + /** List of supported table types. */ + getTableTypes(request: TableTypesRequest): Promise; + + /** Full column information for a table. */ + getColumns(request: ColumnsRequest): Promise; + + /** Information about a function. */ + getFunctions(request: FunctionsRequest): Promise; + + /** Primary keys of a table. */ + getPrimaryKeys(request: PrimaryKeysRequest): Promise; + + /** Foreign-key relationships between two tables. */ + getCrossReference(request: CrossReferenceRequest): Promise; + + /** Close the session. Idempotent. */ + close(): Promise; +} diff --git a/lib/contracts/InternalConnectionOptions.ts b/lib/contracts/InternalConnectionOptions.ts new file mode 100644 index 00000000..a115aa47 --- /dev/null +++ b/lib/contracts/InternalConnectionOptions.ts @@ -0,0 +1,21 @@ +/** + * Internal, non-exported extension of `ConnectionOptions`. Carries M0-only + * flags that should not appear in the published `.d.ts`. + * + * Matches the Python connector pattern: there, `use_sea` is consumed via + * `kwargs.get("use_sea", False)` and is intentionally absent from the typed + * signature (see `databricks-sql-python/src/databricks/sql/session.py`). + * + * Callers cast `ConnectionOptions` to this type *only* at the read site + * inside the driver; user code that wants to set `useSEA` may still do so + * via an untyped object literal — the option is not part of the public + * contract and may be removed without notice. + */ +export interface InternalConnectionOptions { + /** + * Opt-in flag to dispatch through the Statement Execution API (SEA) + * backend instead of the default Thrift backend. Defaults to `false`. + * @internal Not stable; M0 stub only. + */ + useSEA?: boolean; +} diff --git a/lib/contracts/OperationStatus.ts b/lib/contracts/OperationStatus.ts new file mode 100644 index 00000000..7f167aba --- /dev/null +++ b/lib/contracts/OperationStatus.ts @@ -0,0 +1,56 @@ +/** + * Backend-neutral operation state. Mirrors the kernel/pyo3 `StatementStatus` + * and the Python connector's `CommandState`, so a SEA `IOperationBackend` + * implementer can return these without depending on the Thrift wire enum. + * + * Thrift mapping (in `ThriftOperationBackend.adaptOperationStatus`): + * - INITIALIZED_STATE, PENDING_STATE → Pending + * - RUNNING_STATE → Running + * - FINISHED_STATE → Succeeded + * - CANCELED_STATE → Cancelled + * - CLOSED_STATE → Closed + * - ERROR_STATE, TIMEDOUT_STATE → Failed + * - UKNOWN_STATE / anything else → Unknown + */ +export enum OperationState { + Pending = 'Pending', + Running = 'Running', + Succeeded = 'Succeeded', + Failed = 'Failed', + Cancelled = 'Cancelled', + Closed = 'Closed', + Unknown = 'Unknown', +} + +/** + * Neutral status snapshot returned by `IOperationBackend.status()`. Backends + * adapt their wire format at the boundary; callers in `DBSQLOperation` and + * `IOperationBackend.waitUntilReady` switch on `state` alone. + * + * Fields beyond `state` are best-effort and may be undefined depending on + * what the backend exposes. + */ +export interface OperationStatus { + /** Current operation state. */ + state: OperationState; + + /** + * Whether this operation has produced (or is producing) a result set. + * Some backends only know this after the operation reaches a terminal + * state — undefined means "no signal from this backend". + */ + hasResultSet?: boolean; + + /** Human-readable error/display message, if the backend supplied one. */ + errorMessage?: string; + + /** SQL state code (e.g. "42000"), if available. */ + sqlState?: string; + + /** + * Opaque progress payload as returned by the backend when callers pass + * `progress: true`. Treated as untyped by the facade — passed through + * to `WaitUntilReadyOptions.callback` for the consumer to interpret. + */ + progressUpdateResponse?: unknown; +} diff --git a/lib/contracts/ResultMetadata.ts b/lib/contracts/ResultMetadata.ts new file mode 100644 index 00000000..5fc09a79 --- /dev/null +++ b/lib/contracts/ResultMetadata.ts @@ -0,0 +1,39 @@ +import { TTableSchema } from '../../thrift/TCLIService_types'; + +/** + * Backend-neutral result-format taxonomy. Mirrors the three on-wire shapes + * `ThriftOperationBackend` actually dispatches on (`COLUMN_BASED_SET`, + * `ARROW_BASED_SET`, `URL_BASED_SET`); a SEA implementer surfaces the same + * three so result-handling stays format-agnostic. + */ +export enum ResultFormat { + ColumnBased = 'COLUMN_BASED', + ArrowBased = 'ARROW_BASED', + UrlBased = 'URL_BASED', +} + +/** + * Neutral result-set metadata returned by `IOperationBackend.getResultMetadata()`. + * + * `schema` keeps the Thrift `TTableSchema` shape for now because the public + * `DBSQLOperation.getSchema()` and `getMetadata()` already expose it on + * `IOperation`; carrying it across the boundary preserves back-compat. The + * SEA backend will adapt its column descriptors into the same shape until + * the public IOperation surface is migrated in a later PR. + */ +export interface ResultMetadata { + /** Column schema; null if the operation has no result set. */ + schema?: TTableSchema; + + /** Wire format the result handler should dispatch on. */ + resultFormat: ResultFormat; + + /** Whether the result payload is LZ4-compressed. */ + lz4Compressed?: boolean; + + /** Optional Arrow IPC schema bytes (for ARROW_BASED / URL_BASED formats). */ + arrowSchema?: Buffer; + + /** True iff the operation is a staging (PUT/GET/REMOVE) operation. */ + isStagingOperation: boolean; +} diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts new file mode 100644 index 00000000..43958679 --- /dev/null +++ b/lib/sea/SeaBackend.ts @@ -0,0 +1,23 @@ +import IBackend from '../contracts/IBackend'; +import ISessionBackend from '../contracts/ISessionBackend'; +import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient'; +import HiveDriverError from '../errors/HiveDriverError'; + +const NOT_IMPLEMENTED = 'SEA backend not implemented yet — wired in sea-napi-binding feature'; + +export default class SeaBackend implements IBackend { + // eslint-disable-next-line @typescript-eslint/no-unused-vars, class-methods-use-this + public async connect(options: ConnectionOptions): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED); + } + + // eslint-disable-next-line @typescript-eslint/no-unused-vars, class-methods-use-this + public async openSession(request: OpenSessionRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED); + } + + // No-op so DBSQLClient.close() can finish its state-clearing block after a + // failed useSEA: true connect. Real teardown lands with the M1 SEA impl. + // eslint-disable-next-line @typescript-eslint/no-empty-function, class-methods-use-this + public async close(): Promise {} +} diff --git a/lib/sea/SeaErrorMapping.ts b/lib/sea/SeaErrorMapping.ts new file mode 100644 index 00000000..7e8a5534 --- /dev/null +++ b/lib/sea/SeaErrorMapping.ts @@ -0,0 +1,141 @@ +import HiveDriverError from '../errors/HiveDriverError'; +import AuthenticationError from '../errors/AuthenticationError'; +import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; +import ParameterError from '../errors/ParameterError'; + +/** + * Shape of the kernel error surfaced by the napi-binding's `napi_err_from_kernel`. + * + * The Rust kernel's `kernel_error::Error` is exposed as a `JsError` whose + * properties mirror the Rust struct: the `ErrorCode` variant name (as a string), + * the message, and an optional SQLSTATE (either taken from the structured + * server response or recovered via `extract_sqlstate_from_message`). + */ +export interface KernelErrorShape { + /** Kernel `ErrorCode` variant name, e.g. `"Unauthenticated"`, `"SqlError"`. */ + code: string; + /** Human-readable error message. */ + message: string; + /** Optional SQLSTATE — five-char alphanumeric, when the kernel was able to surface it. */ + sqlstate?: string; +} + +/** + * Kernel `ErrorCode` variants — the 13 variants of the `#[non_exhaustive]` enum + * defined in `src/kernel_error.rs:66-134`. + * + * Kept here as a literal type rather than an `enum` so test exhaustiveness checks + * and runtime `code` strings are guaranteed to stay in lockstep with the kernel. + */ +export type KernelErrorCode = + | 'InvalidArgument' + | 'Unauthenticated' + | 'PermissionDenied' + | 'NotFound' + | 'ResourceExhausted' + | 'Unavailable' + | 'Timeout' + | 'Cancelled' + | 'DataLoss' + | 'Internal' + | 'InvalidStatementHandle' + | 'NetworkError' + | 'SqlError'; + +/** + * An `Error` with a preserved SQLSTATE on the `sqlState` property. Used as the + * narrowed return type of {@link mapKernelErrorToJsError} so callers that need + * the SQLSTATE can `error.sqlState` without an `any` cast. + */ +export interface ErrorWithSqlState extends Error { + sqlState?: string; +} + +/** + * Attach the kernel's SQLSTATE to the JS error object via the `sqlState` property. + * The driver has no pre-existing `sqlState` convention (no other error class + * sets it today) so this single helper defines it for the SEA path. + */ +function attachSqlState(error: ErrorWithSqlState, sqlstate?: string): ErrorWithSqlState { + if (sqlstate !== undefined) { + // Using Object.defineProperty so the property is non-enumerable but still + // visible via direct access — matches the way Node attaches `.code` to system errors. + Object.defineProperty(error, 'sqlState', { + value: sqlstate, + writable: true, + enumerable: false, + configurable: true, + }); + } + return error; +} + +/** + * Map a kernel error (as surfaced by the napi-binding) to the appropriate JS + * driver error class. + * + * M0 mapping table: + * Unauthenticated, PermissionDenied → AuthenticationError + * Cancelled → OperationStateError(Canceled) + * Timeout → OperationStateError(Timeout) + * InvalidArgument → ParameterError + * NetworkError, Unavailable, + * NotFound, ResourceExhausted, + * DataLoss, Internal, + * InvalidStatementHandle, SqlError → HiveDriverError + * + * Unknown `code` values (e.g. if the kernel adds a new variant) fall through + * to HiveDriverError so the driver never silently drops an error. The kernel's + * `ErrorCode` is `#[non_exhaustive]` so this can legitimately happen. + * + * SQLSTATE, when present, is attached on `error.sqlState` regardless of which + * class is returned. + */ +export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlState { + const { code, message, sqlstate } = kErr; + + let error: ErrorWithSqlState; + + switch (code as KernelErrorCode) { + case 'Unauthenticated': + case 'PermissionDenied': + error = new AuthenticationError(message); + break; + + case 'Cancelled': + // OperationStateError with the Canceled code carries the kernel message + // through the response.displayMessage fallback path. + error = new OperationStateError(OperationStateErrorCode.Canceled); + error.message = message; + break; + + case 'Timeout': + error = new OperationStateError(OperationStateErrorCode.Timeout); + error.message = message; + break; + + case 'InvalidArgument': + error = new ParameterError(message); + break; + + // All remaining kernel ErrorCode variants map to the base driver error class. + // M0 intentionally does not introduce new error classes; M1 may add nuance. + case 'NotFound': + case 'ResourceExhausted': + case 'Unavailable': + case 'DataLoss': + case 'Internal': + case 'InvalidStatementHandle': + case 'NetworkError': + case 'SqlError': + error = new HiveDriverError(message); + break; + + default: + // Unknown/future kernel variant — never drop the error, surface as base class. + error = new HiveDriverError(message); + break; + } + + return attachSqlState(error, sqlstate); +} diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts new file mode 100644 index 00000000..b4ac71ff --- /dev/null +++ b/lib/sea/SeaNativeLoader.ts @@ -0,0 +1,209 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Lazy loader for the SEA (Statement Execution API) native binding. + * + * Mirrors the load-failure-tolerant pattern of `lib/utils/lz4.ts`: the + * `.node` artifact ships via per-platform optional dependencies + * (`@databricks/sql-kernel-`), so its absence must not crash + * a Thrift-only consumer of the driver. Callers that actually need + * SEA construct a {@link SeaNativeLoader} (or use the process-global + * {@link getSeaNative}) which throws a structured error if the binding + * could not be loaded. + * + * M0 publishes a single triple (`linux-x64-gnu`); see + * `native/sea/README.md` for the supported-platform policy. + */ + +import type { + Connection as NativeConnection, + Statement as NativeStatement, + ConnectionOptions as NativeConnectionOptions, + ArrowBatch as NativeArrowBatch, + ArrowSchema as NativeArrowSchema, +} from '../../native/sea'; + +// SEA-prefixed re-exports. The kernel-generated `.d.ts` keeps the +// napi-rs default names (`ConnectionOptions`, `ArrowBatch`, …); we +// disambiguate on the TS-wrapper side so these never collide with the +// Thrift-side `ConnectionOptions` (lib/contracts/IDBSQLClient.ts) or +// `ArrowBatch` (lib/result/utils.ts) when imported elsewhere. +export type SeaConnectionOptions = NativeConnectionOptions; +export type SeaArrowBatch = NativeArrowBatch; +export type SeaArrowSchema = NativeArrowSchema; +export type SeaConnection = NativeConnection; +export type SeaStatement = NativeStatement; + +/** + * The full native binding surface, derived from the generated module + * so it can never drift from the `.d.ts` contract: when the kernel + * adds or renames a free function / class, this type follows + * automatically and `defaultRequire`'s cast stays correct. + */ +export type SeaNativeBinding = typeof import('../../native/sea'); + +const MIN_NODE_MAJOR = 18; + +function detectNodeMajor(): number { + // `process.version` is `vX.Y.Z`; parseInt stops at the first non-digit. + return parseInt(process.version.slice(1), 10); +} + +function platformLabel(): string { + return `${process.platform}-${process.arch}`; +} + +function loadFailureHint(err: NodeJS.ErrnoException): string { + const platform = platformLabel(); + // Do not name a concrete package: the published name uses the napi-rs + // triple (e.g. `-linux-x64-gnu` / `-linux-x64-musl` / `-win32-x64-msvc`), + // not the bare `${platform}` shown here, so a literal example would + // 404. Point at the README's supported-triple list instead. + const installHint = + 'Install the matching @databricks/sql-kernel-* optional dependency for your platform ' + + '(see native/sea/README.md for the supported triples; M0 ships linux-x64-gnu only).'; + if (err.code === 'MODULE_NOT_FOUND') { + return `SEA native binding not installed for platform ${platform} on Node ${process.version}. ${installHint}`; + } + if (err.code === 'ERR_DLOPEN_FAILED') { + // Surface the underlying dlerror string (e.g. `GLIBC_2.32 not found`) + // plus concrete remediation — without it the cause is invisible. + return ( + `SEA native binding present but failed to dlopen on platform ${platform} / Node ${process.version}: ` + + `${err.message}. Common causes: glibc/musl mismatch (e.g. Alpine Linux — install the -musl variant), ` + + `Node ABI mismatch (try \`rm -rf node_modules && npm install\`), or CPU-architecture mismatch. ` + + `The binding requires Node >=${MIN_NODE_MAJOR}.` + ); + } + return `SEA native binding failed to load on platform ${platform} / Node ${process.version}: ${err.message}`; +} + +/** + * Default loader: resolves `native/sea/index.js` (the napi-rs router), + * which selects the per-platform `.node`. `.js` is omitted so eslint's + * `import/extensions` rule accepts the call. + */ +function defaultRequire(): SeaNativeBinding { + // eslint-disable-next-line @typescript-eslint/no-var-requires, global-require + return require('../../native/sea') as SeaNativeBinding; +} + +/** + * Verify the loaded module exposes the surface the driver depends on. + * Catches kernel-side renames at load time rather than letting them + * surface as `undefined is not a function` deep in a call path. + */ +function assertBindingShape(binding: SeaNativeBinding): void { + const missing: string[] = []; + if (typeof binding.version !== 'function') missing.push('version'); + if (typeof binding.openSession !== 'function') missing.push('openSession'); + if (typeof binding.Connection !== 'function') missing.push('Connection'); + if (typeof binding.Statement !== 'function') missing.push('Statement'); + if (missing.length > 0) { + throw new Error( + `SEA native binding loaded but is missing expected export(s): ${missing.join(', ')}. ` + + `The kernel-generated binding and the JS loader are out of sync.`, + ); + } +} + +/** + * Loads and caches the SEA native binding. Exposed as a class with an + * injectable `load` seam so consumers (e.g. `SeaBackend`) can be unit + * tested with a stub binding instead of requiring a real `.node` on the + * test machine. Most production code uses the process-global default + * via {@link getSeaNative} / {@link tryGetSeaNative}. + */ +export class SeaNativeLoader { + private cached: SeaNativeBinding | null | undefined; + + private cachedError: Error | undefined; + + constructor(private readonly load: () => SeaNativeBinding = defaultRequire) {} + + private tryLoad(): SeaNativeBinding | undefined { + const nodeMajor = detectNodeMajor(); + // Fail closed: if we cannot determine the Node major (NaN) or it is + // below the floor, refuse the load and fall back to Thrift. + if (!Number.isFinite(nodeMajor) || nodeMajor < MIN_NODE_MAJOR) { + this.cachedError = new Error( + `SEA native binding requires Node >=${MIN_NODE_MAJOR}; running Node ${process.version}. ` + + `Continue using the Thrift backend on this runtime.`, + ); + return undefined; + } + + try { + const binding = this.load(); + assertBindingShape(binding); + return binding; + } catch (err) { + if (err instanceof Error && 'code' in err) { + this.cachedError = new Error(loadFailureHint(err as NodeJS.ErrnoException)); + } else if (err instanceof Error) { + // Shape-check failure or any other Error — preserve its message. + this.cachedError = err; + } else { + this.cachedError = new Error(`SEA native binding failed to load with non-standard error: ${String(err)}`); + } + return undefined; + } + } + + /** + * Returns the loaded native binding. Throws a structured error if the + * binding is unavailable on this platform / Node version. + */ + get(): SeaNativeBinding { + if (this.cached === undefined) { + this.cached = this.tryLoad() ?? null; + } + if (this.cached === null) { + throw this.cachedError ?? new Error('SEA native binding unavailable'); + } + return this.cached; + } + + /** + * Returns the loaded binding or `undefined` if it could not be + * loaded. Use this for capability-detection at startup; use + * {@link get} at the point where SEA is actually required. + */ + tryGet(): SeaNativeBinding | undefined { + if (this.cached === undefined) { + this.cached = this.tryLoad() ?? null; + } + return this.cached ?? undefined; + } +} + +// Process-global default instance + thin convenience wrappers. +const defaultLoader = new SeaNativeLoader(); + +/** + * Returns the loaded native binding from the process-global loader. + * Throws a structured error if the binding is unavailable. + */ +export function getSeaNative(): SeaNativeBinding { + return defaultLoader.get(); +} + +/** + * Returns the loaded binding from the process-global loader, or + * `undefined` if it could not be loaded. + */ +export function tryGetSeaNative(): SeaNativeBinding | undefined { + return defaultLoader.tryGet(); +} diff --git a/lib/thrift-backend/ThriftBackend.ts b/lib/thrift-backend/ThriftBackend.ts new file mode 100644 index 00000000..5e0e7570 --- /dev/null +++ b/lib/thrift-backend/ThriftBackend.ts @@ -0,0 +1,100 @@ +import Int64 from 'node-int64'; +import IBackend from '../contracts/IBackend'; +import ISessionBackend from '../contracts/ISessionBackend'; +import IClientContext from '../contracts/IClientContext'; +import { OpenSessionRequest } from '../contracts/IDBSQLClient'; +import { TProtocolVersion } from '../../thrift/TCLIService_types'; +import Status from '../dto/Status'; +import { definedOrError, serializeQueryTags } from '../utils'; +import ThriftSessionBackend from './ThriftSessionBackend'; + +function getInitialNamespaceOptions(catalogName?: string, schemaName?: string) { + if (!catalogName && !schemaName) { + return {}; + } + + return { + initialNamespace: { + catalogName, + schemaName, + }, + }; +} + +interface ThriftBackendOptions { + context: IClientContext; + onConnectionEvent: (event: 'error' | 'reconnecting' | 'close' | 'timeout', payload?: unknown) => void; +} + +export default class ThriftBackend implements IBackend { + private readonly context: IClientContext; + + private readonly onConnectionEvent: ThriftBackendOptions['onConnectionEvent']; + + constructor({ context, onConnectionEvent }: ThriftBackendOptions) { + this.context = context; + this.onConnectionEvent = onConnectionEvent; + } + + public async connect(): Promise { + // The connection provider is owned by DBSQLClient (it implements IClientContext). + // We only need to wire the EventEmitter listeners through this backend. + const connectionProvider = await this.context.getConnectionProvider(); + const thriftConnection = await connectionProvider.getThriftConnection(); + + thriftConnection.on('error', (error: Error) => { + this.onConnectionEvent('error', error); + }); + + thriftConnection.on('reconnecting', (params: { delay: number; attempt: number }) => { + this.onConnectionEvent('reconnecting', params); + }); + + thriftConnection.on('close', () => { + this.onConnectionEvent('close'); + }); + + thriftConnection.on('timeout', () => { + this.onConnectionEvent('timeout'); + }); + } + + public async openSession(request: OpenSessionRequest): Promise { + const driver = await this.context.getDriver(); + const config = this.context.getConfig(); + + const configuration = request.configuration ? { ...request.configuration } : {}; + + if (config.enableMetricViewMetadata) { + configuration['spark.sql.thriftserver.metadata.metricview.enabled'] = 'true'; + } + + if (request.queryTags !== undefined) { + const serialized = serializeQueryTags(request.queryTags); + if (serialized) { + configuration.QUERY_TAGS = serialized; + } else { + delete configuration.QUERY_TAGS; + } + } + + const response = await driver.openSession({ + client_protocol_i64: new Int64(TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V8), + ...getInitialNamespaceOptions(request.initialCatalog, request.initialSchema), + configuration, + canUseMultipleCatalogs: true, + }); + + Status.assert(response.status); + return new ThriftSessionBackend({ + handle: definedOrError(response.sessionHandle), + context: this.context, + serverProtocolVersion: response.serverProtocolVersion, + }); + } + + public async close(): Promise { + // DBSQLClient owns the connection lifecycle and clears its own state + // (connectionProvider, authProvider, thrift client) after this returns. + } +} diff --git a/lib/thrift-backend/ThriftOperationBackend.ts b/lib/thrift-backend/ThriftOperationBackend.ts new file mode 100644 index 00000000..436d4928 --- /dev/null +++ b/lib/thrift-backend/ThriftOperationBackend.ts @@ -0,0 +1,382 @@ +import { stringify, NIL } from 'uuid'; +import { + TGetOperationStatusResp, + TOperationHandle, + TSparkDirectResults, + TGetResultSetMetadataResp, + TSparkRowSetType, + TCloseOperationResp, + TOperationState, +} from '../../thrift/TCLIService_types'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import { WaitUntilReadyOptions } from '../contracts/IOperation'; +import { OperationStatus, OperationState } from '../contracts/OperationStatus'; +import { ResultMetadata, ResultFormat } from '../contracts/ResultMetadata'; +import Status from '../dto/Status'; +import { LogLevel } from '../contracts/IDBSQLLogger'; +import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; +import IResultsProvider from '../result/IResultsProvider'; +import RowSetProvider from '../result/RowSetProvider'; +import JsonResultHandler from '../result/JsonResultHandler'; +import ArrowResultHandler from '../result/ArrowResultHandler'; +import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; +import ArrowResultConverter from '../result/ArrowResultConverter'; +import ResultSlicer from '../result/ResultSlicer'; +import { definedOrError } from '../utils'; +import HiveDriverError from '../errors/HiveDriverError'; + +interface ThriftOperationBackendOptions { + handle: TOperationHandle; + directResults?: TSparkDirectResults; + context: IClientContext; +} + +async function delay(ms?: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); +} + +function thriftStateToOperationState(state: TOperationState | undefined | null): OperationState { + switch (state) { + case TOperationState.INITIALIZED_STATE: + case TOperationState.PENDING_STATE: + return OperationState.Pending; + case TOperationState.RUNNING_STATE: + return OperationState.Running; + case TOperationState.FINISHED_STATE: + return OperationState.Succeeded; + case TOperationState.CANCELED_STATE: + return OperationState.Cancelled; + case TOperationState.CLOSED_STATE: + return OperationState.Closed; + case TOperationState.ERROR_STATE: + case TOperationState.TIMEDOUT_STATE: + return OperationState.Failed; + case TOperationState.UKNOWN_STATE: + default: + return OperationState.Unknown; + } +} + +function thriftRowSetTypeToResultFormat(type: TSparkRowSetType): ResultFormat { + switch (type) { + case TSparkRowSetType.COLUMN_BASED_SET: + return ResultFormat.ColumnBased; + case TSparkRowSetType.ARROW_BASED_SET: + return ResultFormat.ArrowBased; + case TSparkRowSetType.URL_BASED_SET: + return ResultFormat.UrlBased; + default: + throw new HiveDriverError(`Unsupported result format: ${TSparkRowSetType[type]}`); + } +} + +export default class ThriftOperationBackend implements IOperationBackend { + private readonly context: IClientContext; + + private readonly operationHandle: TOperationHandle; + + private readonly _data: RowSetProvider; + + private readonly closeOperation?: TCloseOperationResp; + + private metadata?: TGetResultSetMetadataResp; + + private metadataPromise?: Promise; + + private state: TOperationState = TOperationState.INITIALIZED_STATE; + + private operationStatus?: TGetOperationStatusResp; + + private resultHandler?: ResultSlicer; + + constructor({ handle, directResults, context }: ThriftOperationBackendOptions) { + this.operationHandle = handle; + this.context = context; + + const useOnlyPrefetchedResults = Boolean(directResults?.closeOperation); + + if (directResults?.operationStatus) { + this.processOperationStatusResponse(directResults.operationStatus); + } + + this.metadata = directResults?.resultSetMetadata; + this._data = new RowSetProvider( + this.context, + this.operationHandle, + [directResults?.resultSet], + useOnlyPrefetchedResults, + ); + this.closeOperation = directResults?.closeOperation; + } + + public get id(): string { + const operationId = this.operationHandle?.operationId?.guid; + return operationId ? stringify(operationId) : NIL; + } + + public get hasResultSet(): boolean { + return Boolean(this.operationHandle.hasResultSet); + } + + public async fetchChunk({ + limit, + disableBuffering, + }: { + limit: number; + disableBuffering?: boolean; + }): Promise> { + const resultHandler = await this.getResultHandler(); + + // All the library code is Promise-based, however, since Promises are microtasks, + // enqueueing a lot of promises may block macrotasks execution for a while. + // Usually, there are no much microtasks scheduled, however, when fetching query + // results (especially CloudFetch ones) it's quite easy to block event loop for + // long enough to break a lot of things. For example, with CloudFetch, after first + // set of files are downloaded and being processed immediately one by one, event + // loop easily gets blocked for enough time to break connection pool. `http.Agent` + // stops receiving socket events, and marks all sockets invalid on the next attempt + // to use them. See these similar issues that helped to debug this particular case - + // https://github.com/nodejs/node/issues/47130 and https://github.com/node-fetch/node-fetch/issues/1735 + await new Promise((resolve) => { + setTimeout(resolve, 0); + }); + + return resultHandler.fetchNext({ limit, disableBuffering }); + } + + public async hasMore(): Promise { + const resultHandler = await this.getResultHandler(); + return resultHandler.hasMore(); + } + + public async status(progress: boolean): Promise { + const response = await this.thriftStatusResponse(progress); + return this.adaptOperationStatus(response); + } + + /** + * Thrift-specific accessor that returns the raw `TGetOperationStatusResp`. + * + * Used internally to drive the Thrift state machine + attach the wire + * response to `OperationStateError`. Also called by the public + * `DBSQLOperation.status()` facade (zero-loss fast path) so existing user + * code that reads `taskStatus`, `numModifiedRows`, etc. continues to work + * verbatim against the Thrift backend. + * + * Not declared on `IOperationBackend` — non-Thrift backends do not + * implement it. The facade reaches it via `instanceof ThriftOperationBackend`. + */ + public async thriftStatusResponse(progress: boolean): Promise { + if (this.operationStatus) { + return this.operationStatus; + } + + const driver = await this.context.getDriver(); + const response = await driver.getOperationStatus({ + operationHandle: this.operationHandle, + getProgressUpdate: progress, + }); + + return this.processOperationStatusResponse(response); + } + + public async waitUntilReady(options?: WaitUntilReadyOptions): Promise { + if (this.state === TOperationState.FINISHED_STATE) { + return; + } + + let isReady = false; + + while (!isReady) { + // eslint-disable-next-line no-await-in-loop + const response = await this.thriftStatusResponse(Boolean(options?.progress)); + + if (options?.callback) { + // The public `OperationStatusCallback` is Thrift-shaped; pass the + // wire response verbatim. Non-Thrift backends synthesize via + // `synthesizeThriftStatus` in their own `waitUntilReady` impls. + // eslint-disable-next-line no-await-in-loop + await Promise.resolve(options.callback(response)); + } + + switch (response.operationState) { + case TOperationState.INITIALIZED_STATE: + case TOperationState.PENDING_STATE: + case TOperationState.RUNNING_STATE: + break; + + case TOperationState.FINISHED_STATE: + isReady = true; + break; + + case TOperationState.CANCELED_STATE: + throw new OperationStateError(OperationStateErrorCode.Canceled, response); + + case TOperationState.CLOSED_STATE: + throw new OperationStateError(OperationStateErrorCode.Closed, response); + + case TOperationState.ERROR_STATE: + throw new OperationStateError(OperationStateErrorCode.Error, response); + case TOperationState.TIMEDOUT_STATE: + throw new OperationStateError(OperationStateErrorCode.Timeout, response); + case TOperationState.UKNOWN_STATE: + default: + throw new OperationStateError(OperationStateErrorCode.Unknown, response); + } + + if (!isReady) { + // eslint-disable-next-line no-await-in-loop + await delay(100); + } + } + } + + public async getResultMetadata(): Promise { + return this.adaptResultMetadata(await this.thriftResultMetadataResponse()); + } + + /** + * Thrift-specific accessor for the raw `TGetResultSetMetadataResp`. + * + * Used internally by `getResultHandler` (dispatches on Thrift `resultFormat` + * and passes the full Thrift response to the JSON / Arrow / CloudFetch + * result handlers). Also called by the public `DBSQLOperation.getMetadata()` + * facade (zero-loss fast path). + * + * Not declared on `IOperationBackend` — non-Thrift backends do not implement + * it. The facade reaches it via `instanceof ThriftOperationBackend`. + */ + public async thriftResultMetadataResponse(): Promise { + if (this.metadata) { + return this.metadata; + } + + if (this.metadataPromise) { + return this.metadataPromise; + } + + this.metadataPromise = (async () => { + const driver = await this.context.getDriver(); + const metadata = await driver.getResultSetMetadata({ + operationHandle: this.operationHandle, + }); + Status.assert(metadata.status); + this.metadata = metadata; + return metadata; + })(); + + try { + return await this.metadataPromise; + } finally { + this.metadataPromise = undefined; + } + } + + public async cancel(): Promise { + this.context.getLogger().log(LogLevel.debug, `Cancelling operation with id: ${this.id}`); + const driver = await this.context.getDriver(); + const response = await driver.cancelOperation({ + operationHandle: this.operationHandle, + }); + Status.assert(response.status); + return new Status(response.status); + } + + public async close(): Promise { + this.context.getLogger().log(LogLevel.debug, `Closing operation with id: ${this.id}`); + const driver = await this.context.getDriver(); + const response = + this.closeOperation ?? + (await driver.closeOperation({ + operationHandle: this.operationHandle, + })); + Status.assert(response.status); + return new Status(response.status); + } + + private async getResultHandler(): Promise> { + const metadata = await this.thriftResultMetadataResponse(); + const resultFormat = definedOrError(metadata.resultFormat); + + if (!this.resultHandler) { + let resultSource: IResultsProvider> | undefined; + + switch (resultFormat) { + case TSparkRowSetType.COLUMN_BASED_SET: + resultSource = new JsonResultHandler(this.context, this._data, metadata); + break; + case TSparkRowSetType.ARROW_BASED_SET: + resultSource = new ArrowResultConverter( + this.context, + new ArrowResultHandler(this.context, this._data, metadata), + metadata, + ); + break; + case TSparkRowSetType.URL_BASED_SET: + resultSource = new ArrowResultConverter( + this.context, + new CloudFetchResultHandler(this.context, this._data, metadata), + metadata, + ); + break; + // no default + } + + if (resultSource) { + this.resultHandler = new ResultSlicer(this.context, resultSource); + } + } + + if (!this.resultHandler) { + throw new HiveDriverError(`Unsupported result format: ${TSparkRowSetType[resultFormat]}`); + } + + return this.resultHandler; + } + + private processOperationStatusResponse(response: TGetOperationStatusResp) { + Status.assert(response.status); + + this.state = response.operationState ?? this.state; + + if (typeof response.hasResultSet === 'boolean') { + this.operationHandle.hasResultSet = response.hasResultSet; + } + + const isInProgress = [ + TOperationState.INITIALIZED_STATE, + TOperationState.PENDING_STATE, + TOperationState.RUNNING_STATE, + ].includes(this.state); + + if (!isInProgress) { + this.operationStatus = response; + } + + return response; + } + + private adaptOperationStatus(response: TGetOperationStatusResp): OperationStatus { + return { + state: thriftStateToOperationState(response.operationState), + hasResultSet: typeof response.hasResultSet === 'boolean' ? response.hasResultSet : undefined, + errorMessage: response.errorMessage ?? response.displayMessage ?? undefined, + sqlState: response.sqlState ?? undefined, + progressUpdateResponse: response.progressUpdateResponse, + }; + } + + // eslint-disable-next-line class-methods-use-this + private adaptResultMetadata(response: TGetResultSetMetadataResp): ResultMetadata { + return { + schema: response.schema, + resultFormat: thriftRowSetTypeToResultFormat(definedOrError(response.resultFormat)), + lz4Compressed: response.lz4Compressed, + arrowSchema: response.arrowSchema, + isStagingOperation: Boolean(response.isStagingOperation), + }; + } +} diff --git a/lib/thrift-backend/ThriftSessionBackend.ts b/lib/thrift-backend/ThriftSessionBackend.ts new file mode 100644 index 00000000..c103ab4f --- /dev/null +++ b/lib/thrift-backend/ThriftSessionBackend.ts @@ -0,0 +1,333 @@ +import { stringify, NIL } from 'uuid'; +import Int64 from 'node-int64'; +import { + TSessionHandle, + TStatus, + TOperationHandle, + TSparkDirectResults, + TSparkArrowTypes, + TSparkParameter, + TProtocolVersion, + TExecuteStatementReq, +} from '../../thrift/TCLIService_types'; +import ISessionBackend from '../contracts/ISessionBackend'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext, { ClientConfig } from '../contracts/IClientContext'; +import { + ExecuteStatementOptions, + TypeInfoRequest, + CatalogsRequest, + SchemasRequest, + TablesRequest, + TableTypesRequest, + ColumnsRequest, + FunctionsRequest, + PrimaryKeysRequest, + CrossReferenceRequest, +} from '../contracts/IDBSQLSession'; +import Status from '../dto/Status'; +import InfoValue from '../dto/InfoValue'; +import { definedOrError, LZ4, ProtocolVersion, serializeQueryTags } from '../utils'; +import ParameterError from '../errors/ParameterError'; +import { DBSQLParameter, DBSQLParameterValue } from '../DBSQLParameter'; +import { LogLevel } from '../contracts/IDBSQLLogger'; +import ThriftOperationBackend from './ThriftOperationBackend'; + +interface OperationResponseShape { + status: TStatus; + operationHandle?: TOperationHandle; + directResults?: TSparkDirectResults; +} + +export function numberToInt64(value: number | bigint | Int64): Int64 { + if (value instanceof Int64) { + return value; + } + + if (typeof value === 'bigint') { + const buffer = new ArrayBuffer(BigInt64Array.BYTES_PER_ELEMENT); + const view = new DataView(buffer); + view.setBigInt64(0, value, false); // `false` to use big-endian order + return new Int64(Buffer.from(buffer)); + } + + return new Int64(value); +} + +function getDirectResultsOptions(maxRows: number | bigint | Int64 | null | undefined, config: ClientConfig) { + if (maxRows === null) { + return {}; + } + + return { + getDirectResults: { + maxRows: numberToInt64(maxRows ?? config.directResultsDefaultMaxRows), + }, + }; +} + +function getArrowOptions( + config: ClientConfig, + serverProtocolVersion: TProtocolVersion | undefined | null, +): { + canReadArrowResult: boolean; + useArrowNativeTypes?: TSparkArrowTypes; +} { + const { arrowEnabled = true, useArrowNativeTypes = true } = config; + + if (!arrowEnabled || !ProtocolVersion.supportsArrowMetadata(serverProtocolVersion)) { + return { + canReadArrowResult: false, + }; + } + + return { + canReadArrowResult: true, + useArrowNativeTypes: { + timestampAsArrow: useArrowNativeTypes, + decimalAsArrow: useArrowNativeTypes, + complexTypesAsArrow: useArrowNativeTypes, + intervalTypesAsArrow: false, + }, + }; +} + +function getQueryParameters( + namedParameters?: Record, + ordinalParameters?: Array, +): Array { + const namedParametersProvided = namedParameters !== undefined && Object.keys(namedParameters).length > 0; + const ordinalParametersProvided = ordinalParameters !== undefined && ordinalParameters.length > 0; + + if (namedParametersProvided && ordinalParametersProvided) { + throw new ParameterError('Driver does not support both ordinal and named parameters.'); + } + + if (!namedParametersProvided && !ordinalParametersProvided) { + return []; + } + + const result: Array = []; + + if (namedParameters !== undefined) { + for (const name of Object.keys(namedParameters)) { + const value = namedParameters[name]; + const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value }); + result.push(param.toSparkParameter({ name })); + } + } + + if (ordinalParameters !== undefined) { + for (const value of ordinalParameters) { + const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value }); + result.push(param.toSparkParameter()); + } + } + + return result; +} + +interface ThriftSessionBackendOptions { + handle: TSessionHandle; + context: IClientContext; + serverProtocolVersion?: TProtocolVersion; +} + +export default class ThriftSessionBackend implements ISessionBackend { + private readonly context: IClientContext; + + private readonly sessionHandle: TSessionHandle; + + private readonly serverProtocolVersion?: TProtocolVersion; + + constructor({ handle, context, serverProtocolVersion }: ThriftSessionBackendOptions) { + this.sessionHandle = handle; + this.context = context; + this.serverProtocolVersion = serverProtocolVersion; + this.context.getLogger().log(LogLevel.debug, `Server protocol version: ${this.serverProtocolVersion}`); + } + + private getRunAsyncForMetadataOperations(): boolean | undefined { + return ProtocolVersion.supportsAsyncMetadataOperations(this.serverProtocolVersion) ? true : undefined; + } + + public get id(): string { + const sessionId = this.sessionHandle?.sessionId?.guid; + return sessionId ? stringify(sessionId) : NIL; + } + + public async getInfo(infoType: number): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getInfo({ + sessionHandle: this.sessionHandle, + infoType, + }); + Status.assert(response.status); + return new InfoValue(response.infoValue); + } + + public async executeStatement(statement: string, options: ExecuteStatementOptions): Promise { + const driver = await this.context.getDriver(); + const clientConfig = this.context.getConfig(); + + const request = new TExecuteStatementReq({ + sessionHandle: this.sessionHandle, + statement, + queryTimeout: options.queryTimeout ? numberToInt64(options.queryTimeout) : undefined, + runAsync: true, + ...getDirectResultsOptions(options.maxRows, clientConfig), + ...getArrowOptions(clientConfig, this.serverProtocolVersion), + }); + + if (ProtocolVersion.supportsParameterizedQueries(this.serverProtocolVersion)) { + request.parameters = getQueryParameters(options.namedParameters, options.ordinalParameters); + } + + const serializedQueryTags = serializeQueryTags(options.queryTags); + if (serializedQueryTags !== undefined) { + request.confOverlay = { ...request.confOverlay, query_tags: serializedQueryTags }; + } + + if (ProtocolVersion.supportsCloudFetch(this.serverProtocolVersion)) { + request.canDownloadResult = options.useCloudFetch ?? clientConfig.useCloudFetch; + } + + if (ProtocolVersion.supportsArrowCompression(this.serverProtocolVersion) && request.canDownloadResult !== true) { + request.canDecompressLZ4Result = (options.useLZ4Compression ?? clientConfig.useLZ4Compression) && Boolean(LZ4()); + } + + const response = await driver.executeStatement(request); + return this.createOperationBackend(response); + } + + public async getTypeInfo(request: TypeInfoRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getTypeInfo({ + sessionHandle: this.sessionHandle, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getCatalogs(request: CatalogsRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getCatalogs({ + sessionHandle: this.sessionHandle, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getSchemas(request: SchemasRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getSchemas({ + sessionHandle: this.sessionHandle, + catalogName: request.catalogName, + schemaName: request.schemaName, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getTables(request: TablesRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getTables({ + sessionHandle: this.sessionHandle, + catalogName: request.catalogName, + schemaName: request.schemaName, + tableName: request.tableName, + tableTypes: request.tableTypes, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getTableTypes(request: TableTypesRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getTableTypes({ + sessionHandle: this.sessionHandle, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getColumns(request: ColumnsRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getColumns({ + sessionHandle: this.sessionHandle, + catalogName: request.catalogName, + schemaName: request.schemaName, + tableName: request.tableName, + columnName: request.columnName, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getFunctions(request: FunctionsRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getFunctions({ + sessionHandle: this.sessionHandle, + catalogName: request.catalogName, + schemaName: request.schemaName, + functionName: request.functionName, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getPrimaryKeys(request: PrimaryKeysRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getPrimaryKeys({ + sessionHandle: this.sessionHandle, + catalogName: request.catalogName, + schemaName: request.schemaName, + tableName: request.tableName, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async getCrossReference(request: CrossReferenceRequest): Promise { + const driver = await this.context.getDriver(); + const response = await driver.getCrossReference({ + sessionHandle: this.sessionHandle, + parentCatalogName: request.parentCatalogName, + parentSchemaName: request.parentSchemaName, + parentTableName: request.parentTableName, + foreignCatalogName: request.foreignCatalogName, + foreignSchemaName: request.foreignSchemaName, + foreignTableName: request.foreignTableName, + runAsync: this.getRunAsyncForMetadataOperations(), + ...getDirectResultsOptions(request.maxRows, this.context.getConfig()), + }); + return this.createOperationBackend(response); + } + + public async close(): Promise { + const driver = await this.context.getDriver(); + const response = await driver.closeSession({ + sessionHandle: this.sessionHandle, + }); + Status.assert(response.status); + return new Status(response.status); + } + + private createOperationBackend(response: OperationResponseShape): IOperationBackend { + Status.assert(response.status); + const handle = definedOrError(response.operationHandle); + return new ThriftOperationBackend({ + handle, + directResults: response.directResults, + context: this.context, + }); + } +} diff --git a/lib/utils/thriftWireSynthesis.ts b/lib/utils/thriftWireSynthesis.ts new file mode 100644 index 00000000..b2f69246 --- /dev/null +++ b/lib/utils/thriftWireSynthesis.ts @@ -0,0 +1,87 @@ +import { + TGetOperationStatusResp, + TGetResultSetMetadataResp, + TOperationState, + TSparkRowSetType, + TStatus, + TStatusCode, +} from '../../thrift/TCLIService_types'; +import { OperationState, OperationStatus } from '../contracts/OperationStatus'; +import { ResultFormat, ResultMetadata } from '../contracts/ResultMetadata'; + +function synthesizeOkStatus(): TStatus { + return { statusCode: TStatusCode.SUCCESS_STATUS } as TStatus; +} + +function operationStateToThrift(state: OperationState): TOperationState { + switch (state) { + case OperationState.Pending: + return TOperationState.PENDING_STATE; + case OperationState.Running: + return TOperationState.RUNNING_STATE; + case OperationState.Succeeded: + return TOperationState.FINISHED_STATE; + case OperationState.Cancelled: + return TOperationState.CANCELED_STATE; + case OperationState.Closed: + return TOperationState.CLOSED_STATE; + case OperationState.Failed: + return TOperationState.ERROR_STATE; + case OperationState.Unknown: + default: + return TOperationState.UKNOWN_STATE; + } +} + +function resultFormatToThrift(format: ResultFormat): TSparkRowSetType { + switch (format) { + case ResultFormat.ColumnBased: + return TSparkRowSetType.COLUMN_BASED_SET; + case ResultFormat.ArrowBased: + return TSparkRowSetType.ARROW_BASED_SET; + case ResultFormat.UrlBased: + return TSparkRowSetType.URL_BASED_SET; + default: + return TSparkRowSetType.COLUMN_BASED_SET; + } +} + +/** + * Synthesize a Thrift `TGetOperationStatusResp` from the neutral + * `OperationStatus` DTO. Used by `DBSQLOperation.status()` when running + * against a non-Thrift backend (e.g. SEA) so the public API stays Thrift-shaped. + * + * Lossy by design: Thrift-only fields not carried by `OperationStatus` + * (`taskStatus`, `numModifiedRows`, `operationStarted`, `operationCompleted`, + * `displayMessage`, `diagnosticInfo`) are left undefined. Consumers that + * read those fields will see `undefined` on non-Thrift backends. + */ +export function synthesizeThriftStatus(status: OperationStatus): TGetOperationStatusResp { + return { + status: synthesizeOkStatus(), + operationState: operationStateToThrift(status.state), + sqlState: status.sqlState, + errorMessage: status.errorMessage, + hasResultSet: status.hasResultSet, + progressUpdateResponse: status.progressUpdateResponse as TGetOperationStatusResp['progressUpdateResponse'], + } as TGetOperationStatusResp; +} + +/** + * Synthesize a Thrift `TGetResultSetMetadataResp` from the neutral + * `ResultMetadata` DTO. Used by `DBSQLOperation.getMetadata()` when running + * against a non-Thrift backend. + * + * Lossy: `cacheLookupResult`, `uncompressedBytes`, `compressedBytes` are left + * undefined; `status` is set to a synthetic OK. + */ +export function synthesizeThriftResultSetMetadata(metadata: ResultMetadata): TGetResultSetMetadataResp { + return { + status: synthesizeOkStatus(), + schema: metadata.schema, + resultFormat: resultFormatToThrift(metadata.resultFormat), + lz4Compressed: metadata.lz4Compressed, + arrowSchema: metadata.arrowSchema, + isStagingOperation: metadata.isStagingOperation, + } as TGetResultSetMetadataResp; +} diff --git a/native/sea/README.md b/native/sea/README.md new file mode 100644 index 00000000..2a246059 --- /dev/null +++ b/native/sea/README.md @@ -0,0 +1,87 @@ +# `native/sea/` — consumer-side directory for the Rust napi binding + +**The Rust binding source lives in the kernel repo** at +`databricks-sql-kernel/napi/`. Building it requires a local checkout +of that repo — see "Build for local dev" below. The published npm +package is `@databricks/sql-kernel-`. + +## Workspace topology + +The napi crate is a **standalone Cargo workspace** (`[workspace] +members = ["."]` in `napi/Cargo.toml`), **not** a sibling of `pyo3/` +in the kernel root workspace. + +The reason is Cargo feature unification. pyo3 builds the kernel with +the default `tls-native` feature (system OpenSSL via `native-tls`). +The napi crate has to opt INTO `tls-rustls` instead: napi modules are +loaded into Node.js processes that statically link OpenSSL 3.x, and +dynamically linking the system's OpenSSL 1.1 (which `native-tls` +pulls in on Linux) collides with Node's symbols at module-load time +and segfaults the process before any Rust code runs. `rustls` is +pure Rust + `ring` and avoids the conflict entirely. + +If napi lived in the same workspace as pyo3, `cargo build +--workspace` would unify the kernel's feature set to `tls-native ∪ +tls-rustls`, link both TLS stacks into the resulting napi cdylib, +and reintroduce the same clash. Standalone-workspace is the fix. + +## What lives in this directory + +- `index.d.ts` — TypeScript declarations consumed by `lib/sea/`. + Generated by napi-rs from the Rust source; checked in as the + consumer-facing type contract. +- `index.js` — napi-rs's per-platform router shim. Gitignored; + populated by `npm run build:native` for local dev. In published + tarballs it ships alongside the `.d.ts` and `require()`s the + right `@databricks/sql-kernel-` optional dependency. +- `index.*.node` — the actual native binary, one per platform. + Gitignored. In production these live in the per-triple optional + dependencies (`@databricks/sql-kernel-linux-x64-gnu`, etc.); for + local dev `npm run build:native` copies one into this directory. + +## Build for local dev + +```bash +# From the nodejs repo root: +export DATABRICKS_SQL_KERNEL_REPO=/path/to/your/databricks-sql-kernel +npm run build:native # release build (default) +BUILD_PROFILE= npm run build:native # debug build (empty BUILD_PROFILE drops --release) +``` + +`DATABRICKS_SQL_KERNEL_REPO` points at the kernel repo root (the +directory containing `napi/`) and is required when your kernel +checkout isn't at `../../databricks-sql-kernel` relative to the +nodejs repo. + +## Production load path + +At release time the kernel's CI publishes +`@databricks/sql-kernel-` npm packages — one per supported +platform — each containing a single `.node` binary. The nodejs +driver lists them as `optionalDependencies`; npm installs only the +one matching the consumer's `process.platform` / `process.arch`. +`native/sea/index.js` (the napi-rs router) then `require()`s the +installed package at load time. + +## Supported platforms (M0) + +M0 publishes a **single** triple: **`linux-x64-gnu`** (package +`@databricks/sql-kernel-linux-x64-gnu`). It is the only entry in the +driver's `optionalDependencies`. + +On every other platform (macOS, Windows, linux-arm64, linux-x64-musl +/ Alpine, …) the SEA binding is simply absent: `SeaNativeLoader` +returns `undefined` from `tryGet()` / throws a structured +`MODULE_NOT_FOUND` hint from `get()`, and the driver continues to use +the Thrift backend exclusively. This is expected, not a regression — +additional triples are added to `optionalDependencies` as the kernel +CI starts publishing them in later milestones. + +## Supply-chain note + +The unpublished triple names (`@databricks/sql-kernel-darwin-arm64`, +`…-win32-x64-msvc`, etc.) referenced by the router are **not** +squat-able: `@databricks` is a Databricks-owned npm scope, and npm +only allows org members to publish under a scope it owns. A third +party therefore cannot register `@databricks/sql-kernel-*` and have +the router autoload it. No placeholder packages are required. diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts new file mode 100644 index 00000000..eb16e8ac --- /dev/null +++ b/native/sea/index.d.ts @@ -0,0 +1,297 @@ +/* tslint:disable */ +/* eslint-disable */ + +/* auto-generated by NAPI-RS */ + +/** + * JS-visible options for opening a Databricks SQL session over PAT. + * `token` is required. + * + * Catalog / schema / sessionConf are applied once at session creation + * and remain in effect for every statement run on the resulting + * `Connection`. The SEA wire protocol carries them on + * `CreateSession`, not on `ExecuteStatement` — so there is no + * per-statement override path on this binding. + */ +export interface ConnectionOptions { + /** + * Workspace host, e.g. `adb-…azuredatabricks.net`. The kernel + * normalises this — bare hostnames get `https://` prepended. + */ + hostName: string + /** + * JDBC-style HTTP path, e.g. `/sql/1.0/warehouses/abc123`. The + * kernel parses out the warehouse id. + */ + httpPath: string + /** + * Personal access token. Must be non-empty (the kernel rejects + * empty PATs at session construction). + */ + token: string + /** + * Default catalog for statements executed on this session. + * Routed through the kernel's `DefaultOpts` and onto the SEA + * `CreateSession.catalog` wire field. + */ + catalog?: string + /** + * Default schema for statements executed on this session. + * Routed through the kernel's `DefaultOpts` and onto the SEA + * `CreateSession.schema` wire field. + */ + schema?: string + /** + * Server-bound session conf (Spark conf, `ANSI_MODE`, `TIMEZONE`, + * query-tag presets, …). Forwarded verbatim to SEA + * `session_confs`. Unknown keys are rejected server-side. + */ + sessionConf?: Record + /** + * Maximum number of pooled HTTP connections per host. Routes + * through the kernel's [`HttpConfig::pool_max_idle_per_host`]. + * Tunes the underlying `reqwest` connection pool — higher values + * reduce reconnect overhead when many statements run + * concurrently against the same warehouse. + * + * When the JS caller does NOT provide `maxConnections`, the napi + * binding applies a NodeJS-driver-appropriate default of + * [`NAPI_DEFAULT_POOL_MAX_IDLE_PER_HOST`] (100) — chosen to match + * the JDBC driver's `HttpConnectionPoolSize` default and to close + * the throughput gap vs the NodeJS Thrift driver's + * `maxSockets: Infinity` pool for bursty workloads. The kernel + * core's [`HttpConfig::pool_max_idle_per_host`] default remains + * at the conservative kernel value (10); each binding chooses + * its own user-facing default. Mirrors the Python connector's + * `max_connections` kwarg on the SEA backend, which exposes the + * knob but keeps its own urllib3-aligned default of 10. + * + * Napi-rs serialises `u32` as JS `number`; values up to + * `2^32 - 1` round-trip safely (any reasonable pool size fits). + */ + maxConnections?: number +} +/** + * Open a Databricks SQL session over PAT auth and return an opaque + * `Connection` wrapping the kernel `Session`. + * + * The JS-visible name is `openSession` (napi-rs converts snake_case + * to camelCase for free functions). + */ +export declare function openSession(options: ConnectionOptions): Promise +/** + * A single Arrow IPC stream payload encoding one record batch (plus + * the schema header so the JS-side reader is stateless). + */ +export interface ArrowBatch { + /** + * Arrow IPC stream payload (schema header + 1 record-batch + * message). Decode with `apache-arrow`'s `RecordBatchReader`. + */ + ipcBytes: Buffer +} +/** + * An Arrow IPC stream payload encoding just the result schema (no + * record-batch messages). Returned by `Statement.schema()`. + */ +export interface ArrowSchema { + /** + * Arrow IPC stream payload (schema header only, no record-batch + * messages). Decode with `apache-arrow`'s `RecordBatchReader` — + * the reader will expose the schema and immediately end. + */ + ipcBytes: Buffer +} +/** + * Returns the native binding's crate version (`CARGO_PKG_VERSION`). + * + * Originally the round-1b smoke test; kept as a cheap "is the binding + * loaded?" probe for the JS-side loader's structured diagnostics. + */ +export declare function version(): string +/** + * Opaque connection handle wrapping a kernel `Session`. + * + * `inner` is `Arc>>` so: + * - the Drop impl can clone the `Arc` and `.take()` the session on a + * background tokio task without holding `&mut self` (which Drop is + * forbidden from doing across an `await`), + * - `close()` can `.take()` the session to consume it for the kernel's + * move-by-value `Session::close(self)` signature. + * + * **Current concurrency shape** — `executeStatement` holds + * `inner.lock()` across `stmt.execute().await`, so two concurrent + * `Promise.all([executeStatement(q1), executeStatement(q2)])` calls + * on the same Connection serialise even though the kernel transport + * supports concurrent statements per session, and `close()` blocks + * behind any in-flight execute. The kernel's `Session::statement()` + * is `&self`-callable, so the right shape is `Arc` with + * concurrent execute paths; that lands in the follow-up lock-shape + * refactor — see + * `sea-workflow/jira-candidates/2026-05-24-napi-cancel-during-fetch.md`. + */ +export declare class Connection { + /** + * Server-issued session id. Cached at construction; readable + * even after `close()` so JS-side log lines can correlate + * against kernel / server logs which key on the same id. + */ + get sessionId(): string + /** + * Execute a SQL statement and return a Statement handle that + * streams batches via `fetchNextBatch()`. + * + * No per-statement options: catalog / schema / sessionConf are + * session-level (`openSession`). + */ + executeStatement(sql: string): Promise + /** + * Explicit close. Awaits the server-side `DeleteSession` so the + * JS caller can observe failures (auth revoked mid-session, + * warehouse stopped, network error). Idempotent — a second call + * on an already-closed connection returns `Ok`. + * + * **Errors are terminal from the JS side.** The kernel session + * handle is consumed (`take()`) BEFORE the wire `DeleteSession` + * runs, because `Session::close` takes `self` by value. On `Err`, + * the napi `inner` is already `None`, so a JS-side retry sees a + * closed connection and returns `Ok(())` without re-attempting + * the wire call. The kernel's own `Drop` fire-and-forget retry + * runs once in the background — the JS caller can log the error + * but cannot drive a retry. If you need retry-on-failure + * semantics for `DeleteSession`, layer them above this method. + */ + close(): Promise +} +/** + * Opaque executed-statement handle. + * + * **Current concurrency shape** — every method takes `inner.lock()` + * and holds the guard across the kernel `.await`. tokio `Mutex` is + * FIFO, so cancel/close queue behind any in-flight `fetchNextBatch` + * until it returns naturally. This is a known limitation that exists + * because the napi shape has not yet been split into an + * `Arc` (for cancel/close, which the + * kernel exposes as `&self`-callable) plus a `Mutex>` only + * for the borrowed-mut fetch path. The lock-shape refactor needs a + * small kernel-side accessor and lands in a follow-up PR — see + * `sea-workflow/jira-candidates/2026-05-24-napi-cancel-during-fetch.md`. + * + * `schema` and `statement_id` are cached at construction so they + * survive `close()` — JS callers building error reports against a + * disposed statement can still read them. + */ +export declare class Statement { + /** + * Server-issued statement id. Cached at construction; readable + * even after `close()` so JS-side log lines can correlate against + * kernel / server logs which key on the same id. + */ + get statementId(): string + /** + * Number of rows modified by the statement (UPDATE / INSERT / + * DELETE / MERGE). `null` for SELECT and on warehouses that don't + * surface the counter. Mirrors Thrift's + * `TGetOperationStatusResp.numModifiedRows`. + */ + numModifiedRows(): Promise + /** + * Server-supplied user-facing message. Mirrors Thrift's + * `TGetOperationStatusResp.displayMessage`. **PII / sensitive- + * data note:** may contain SQL fragments or parameter values — + * redact before centralised logging. + * + * Populated on `Succeeded` / `Closed-with-inline-data` paths. + * On terminal-error states (`Failed` / `Cancelled` / + * `Closed-no-data`) the kernel returns an Error instead of a + * `Statement`, and the same field rides on the JS Error envelope + * under the same `displayMessage` key. + */ + displayMessage(): Promise + /** + * Server-supplied diagnostic detail — multi-line operator / + * stack context. Mirrors Thrift's + * `TGetOperationStatusResp.diagnosticInfo`. For support surfaces, + * not user-facing. Same reachability + PII caveats as + * `displayMessage`. + */ + diagnosticInfo(): Promise + /** + * Server-supplied JSON blob with extended error details. Mirrors + * Thrift's `TGetOperationStatusResp.errorDetailsJson`. + * Pass-through string — JS callers parse with `JSON.parse` if + * they need structured access. + * + * **Server-side gating:** populated only when the workspace has + * `spark.databricks.sql.errorDetailsJson.enabled = true` on the + * underlying SQL cluster. The flag is internal-only / default- + * false in the Databricks runtime, so for most JS callers this + * will return `null`. Admin-enabled workspaces return content + * shaped like `{"errorClass": "...", "messageTemplate": "..."}`. + * + * **Unbounded:** when populated, server can return a multi-MB + * blob; size before logging. + */ + errorDetailsJson(): Promise + /** + * Pull the next batch of results. Returns `null` when the stream + * is exhausted. The returned `ArrowBatch.ipcBytes` is a complete + * Arrow IPC stream (schema header + 1 record-batch message) + * suitable for handing to `apache-arrow`'s `RecordBatchReader`. + * + * On `Err`, the stream is in an unspecified state — call + * `close()` and discard the `Statement`. Subsequent + * `fetchNextBatch()` calls after an error are not guaranteed to + * succeed or fail consistently. + */ + fetchNextBatch(): Promise + /** + * Result schema as an Arrow IPC payload (schema header only, no + * record-batch message). Available before any batches have been + * fetched, and remains available after `close()` — the kernel + * materialises the schema eagerly so JS callers can build error + * reports against a disposed statement. + * + * Sync because the body has no `.await` — `encode_ipc_stream` is + * pure CPU work over an `Arc` already cached on the + * wrapper. Mirrors `pyo3/src/statement.rs::arrow_schema` (sync). + * napi-rs converts a panic in a sync `#[napi]` entry point into a + * thrown JS error via its own macro-expanded boundary, so the + * `util::guarded` `catch_unwind` wrapper that the `async fn` + * entry points use is not required for this method. + */ + schema(): ArrowSchema + /** + * Server-side cancel. + * + * Short-circuits to `Ok(())` if `fetchNextBatch` has already + * returned `null` (stream naturally exhausted) — matches the + * JDBC `Statement.cancel()` no-op-after-completion contract, so + * JS callers can fire cancel defensively without distinguishing + * "real cancel" from "raced with natural completion." + * + * Returns `KernelError(InvalidStatementHandle)` if the statement + * has been explicitly `close()`d. + */ + cancel(): Promise + /** + * Explicit close. Awaits the server-side `CloseStatement` so the + * JS caller can observe failures (auth revoked mid-session, + * network error, server-side error). Idempotent — a second call + * on an already-closed statement returns `Ok`. + * + * **Errors are terminal from the JS side.** The kernel executed + * handle is taken out of `inner` BEFORE the wire `CloseStatement` + * runs (so `Drop` knows there's nothing left to clean up). On + * `Err`, the napi `inner` is already `None`, so a JS-side retry + * sees a closed statement and returns `Ok(())` without re- + * attempting the wire call. The kernel-level `ExecutedStatement` + * has been consumed at that point and the value is dropped on + * the way out of the closure — the kernel's `ExecutedStatement:: + * Drop` then fires-and-forgets a single retry on the captured + * runtime. The JS caller can log the error but cannot drive a + * further retry. If you need retry-on-failure semantics for + * `CloseStatement`, layer them above this method. + */ + close(): Promise +} diff --git a/native/sea/index.js b/native/sea/index.js new file mode 100644 index 00000000..6153729d --- /dev/null +++ b/native/sea/index.js @@ -0,0 +1,318 @@ +/* tslint:disable */ +/* eslint-disable */ +/* prettier-ignore */ + +/* auto-generated by NAPI-RS */ + +const { existsSync, readFileSync } = require('fs') +const { join } = require('path') + +const { platform, arch } = process + +let nativeBinding = null +let localFileExisted = false +let loadError = null + +function isMusl() { + // For Node 10 + if (!process.report || typeof process.report.getReport !== 'function') { + try { + const lddPath = require('child_process').execSync('which ldd').toString().trim() + return readFileSync(lddPath, 'utf8').includes('musl') + } catch (e) { + return true + } + } else { + const { glibcVersionRuntime } = process.report.getReport().header + return !glibcVersionRuntime + } +} + +switch (platform) { + case 'android': + switch (arch) { + case 'arm64': + localFileExisted = existsSync(join(__dirname, 'index.android-arm64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.android-arm64.node') + } else { + nativeBinding = require('@databricks/sql-kernel-android-arm64') + } + } catch (e) { + loadError = e + } + break + case 'arm': + localFileExisted = existsSync(join(__dirname, 'index.android-arm-eabi.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.android-arm-eabi.node') + } else { + nativeBinding = require('@databricks/sql-kernel-android-arm-eabi') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Android ${arch}`) + } + break + case 'win32': + switch (arch) { + case 'x64': + localFileExisted = existsSync( + join(__dirname, 'index.win32-x64-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-x64-msvc.node') + } else { + nativeBinding = require('@databricks/sql-kernel-win32-x64-msvc') + } + } catch (e) { + loadError = e + } + break + case 'ia32': + localFileExisted = existsSync( + join(__dirname, 'index.win32-ia32-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-ia32-msvc.node') + } else { + nativeBinding = require('@databricks/sql-kernel-win32-ia32-msvc') + } + } catch (e) { + loadError = e + } + break + case 'arm64': + localFileExisted = existsSync( + join(__dirname, 'index.win32-arm64-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-arm64-msvc.node') + } else { + nativeBinding = require('@databricks/sql-kernel-win32-arm64-msvc') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Windows: ${arch}`) + } + break + case 'darwin': + localFileExisted = existsSync(join(__dirname, 'index.darwin-universal.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-universal.node') + } else { + nativeBinding = require('@databricks/sql-kernel-darwin-universal') + } + break + } catch {} + switch (arch) { + case 'x64': + localFileExisted = existsSync(join(__dirname, 'index.darwin-x64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-x64.node') + } else { + nativeBinding = require('@databricks/sql-kernel-darwin-x64') + } + } catch (e) { + loadError = e + } + break + case 'arm64': + localFileExisted = existsSync( + join(__dirname, 'index.darwin-arm64.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-arm64.node') + } else { + nativeBinding = require('@databricks/sql-kernel-darwin-arm64') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on macOS: ${arch}`) + } + break + case 'freebsd': + if (arch !== 'x64') { + throw new Error(`Unsupported architecture on FreeBSD: ${arch}`) + } + localFileExisted = existsSync(join(__dirname, 'index.freebsd-x64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.freebsd-x64.node') + } else { + nativeBinding = require('@databricks/sql-kernel-freebsd-x64') + } + } catch (e) { + loadError = e + } + break + case 'linux': + switch (arch) { + case 'x64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-x64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-x64-musl.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-x64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-x64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-x64-gnu.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-x64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 'arm64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm64-musl.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-arm64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm64-gnu.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-arm64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 'arm': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm-musleabihf.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm-musleabihf.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-arm-musleabihf') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm-gnueabihf.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm-gnueabihf.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-arm-gnueabihf') + } + } catch (e) { + loadError = e + } + } + break + case 'riscv64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-riscv64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-riscv64-musl.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-riscv64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-riscv64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-riscv64-gnu.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-riscv64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 's390x': + localFileExisted = existsSync( + join(__dirname, 'index.linux-s390x-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-s390x-gnu.node') + } else { + nativeBinding = require('@databricks/sql-kernel-linux-s390x-gnu') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Linux: ${arch}`) + } + break + default: + throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`) +} + +if (!nativeBinding) { + if (loadError) { + throw loadError + } + throw new Error(`Failed to load native binding`) +} + +const { Connection, openSession, Statement, version } = nativeBinding + +module.exports.Connection = Connection +module.exports.openSession = openSession +module.exports.Statement = Statement +module.exports.version = version diff --git a/package.json b/package.json index e430181f..ca1d8fba 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,8 @@ "test": "nyc --report-dir=${NYC_REPORT_DIR:-coverage_unit} mocha --config tests/unit/.mocharc.js", "update-version": "node bin/update-version.js && prettier --write ./lib/version.ts", "build": "npm run update-version && tsc --project tsconfig.build.json", + "build:native": "bash -c 'cd ${DATABRICKS_SQL_KERNEL_REPO:-../../databricks-sql-kernel}/napi && npx --no-install @napi-rs/cli build --platform ${BUILD_PROFILE:---release} && cp index.* $OLDPWD/native/sea/'", + "prepack": "test -f native/sea/index.js || { echo 'ERROR: native/sea/index.js (napi-rs router) is missing — the published tarball would fail to load SEA. It is committed to git; run `npm run build:native` if you removed it.' >&2; exit 1; }", "watch": "tsc --project tsconfig.build.json --watch", "type-check": "tsc --noEmit", "prettier": "prettier . --check", @@ -47,6 +49,7 @@ ], "license": "Apache 2.0", "devDependencies": { + "@napi-rs/cli": "2.18.4", "@types/chai": "^4.3.14", "@types/http-proxy": "^1.17.14", "@types/lz4": "^0.6.4", @@ -89,6 +92,7 @@ "winston": "^3.8.2" }, "optionalDependencies": { - "lz4": "^0.6.5" + "lz4": "^0.6.5", + "@databricks/sql-kernel-linux-x64-gnu": "0.1.0" } } diff --git a/tests/e2e/sea/e2e-smoke.test.ts b/tests/e2e/sea/e2e-smoke.test.ts new file mode 100644 index 00000000..e96efe34 --- /dev/null +++ b/tests/e2e/sea/e2e-smoke.test.ts @@ -0,0 +1,94 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { tableFromIPC } from 'apache-arrow'; +import { tryGetSeaNative, SeaConnection, SeaStatement } from '../../../lib/sea/SeaNativeLoader'; +import config from '../utils/config'; + +// End-to-end smoke test against a live warehouse: +// 1. Open a kernel `Session` over PAT. +// 2. Execute `SELECT 1`, decode the IPC payload, assert the value is 1. +// 3. Exercise lifecycle negative paths (drain-past-null, double-close). +// 4. Close the statement, then the connection. +// +// Credentials come from the shared e2e config (tests/e2e/utils/config.ts: +// E2E_HOST / E2E_PATH / E2E_ACCESS_TOKEN) — the single credential source +// used by every other e2e test, so `npm run e2e` has one consistent +// skip/fail contract rather than two. + +describe('SEA native binding — end-to-end smoke', function smoke() { + // Live-warehouse tests can take >2s through warm-up. + this.timeout(60_000); + + const binding = tryGetSeaNative(); + if (binding === undefined) { + // Optional dependency absent on this platform — never reach the live path. + it.skip('SEA native binding not available on this platform'); + return; + } + + const { host: hostName, path: httpPath, token } = config; + + it('opens a session, runs SELECT 1, decodes the IPC payload to 1', async () => { + const connection: SeaConnection = await binding.openSession({ hostName, httpPath, token }); + expect(connection).to.be.an('object'); + + let statement: SeaStatement | null = null; + try { + statement = await connection.executeStatement('SELECT 1'); + expect(statement).to.be.an('object'); + + const batch = await statement.fetchNextBatch(); + expect(batch).to.not.equal(null); + expect(batch!.ipcBytes).to.be.instanceOf(Buffer); + expect(batch!.ipcBytes.length).to.be.greaterThan(0); + + // Decode the IPC payload and verify the value, not just the shape. + const table = tableFromIPC(batch!.ipcBytes); + expect(table.numRows).to.equal(1); + expect(Number(table.getChildAt(0)!.get(0))).to.equal(1); + + // Drain-past-null: subsequent fetch returns null. + const after = await statement.fetchNextBatch(); + expect(after).to.equal(null); + + // Drain-past-drained: another fetch still returns null (idempotent). + const afterAgain = await statement.fetchNextBatch(); + expect(afterAgain).to.equal(null); + } finally { + if (statement !== null) { + await statement.close(); + } + await connection.close(); + } + }); + + it('returns a schema IPC payload before any batch is fetched', async () => { + const connection: SeaConnection = await binding.openSession({ hostName, httpPath, token }); + try { + const statement = await connection.executeStatement('SELECT 1'); + try { + // schema() is synchronous on the binding (cached at construction). + const schema = statement.schema(); + expect(schema.ipcBytes).to.be.instanceOf(Buffer); + expect(schema.ipcBytes.length).to.be.greaterThan(0); + } finally { + await statement.close(); + } + } finally { + await connection.close(); + } + }); +}); diff --git a/tests/unit/.stubs/OperationStub.ts b/tests/unit/.stubs/OperationStub.ts index cd827141..1dcac5ca 100644 --- a/tests/unit/.stubs/OperationStub.ts +++ b/tests/unit/.stubs/OperationStub.ts @@ -54,6 +54,10 @@ export default class OperationStub implements IOperation { return Promise.reject(new Error('Not implemented')); } + public async getResultMetadata() { + return Promise.reject(new Error('Not implemented')); + } + public iterateChunks(options?: IteratorOptions): IOperationChunksIterator { return new OperationChunksIterator(this, options); } diff --git a/tests/unit/.stubs/createOperationForTest.ts b/tests/unit/.stubs/createOperationForTest.ts new file mode 100644 index 00000000..563ad016 --- /dev/null +++ b/tests/unit/.stubs/createOperationForTest.ts @@ -0,0 +1,25 @@ +import { TOperationHandle, TSparkDirectResults } from '../../../thrift/TCLIService_types'; +import DBSQLOperation from '../../../lib/DBSQLOperation'; +import ThriftOperationBackend from '../../../lib/thrift-backend/ThriftOperationBackend'; +import IClientContext from '../../../lib/contracts/IClientContext'; + +interface CreateOperationForTestArgs { + handle: TOperationHandle; + directResults?: TSparkDirectResults; + context: IClientContext; +} + +/** + * Test helper that mirrors the pre-PR-378 `new DBSQLOperation({ handle, ... })` + * legacy ctor shape, but routes through the post-PR-378 `{ backend, ... }` + * shape by constructing a `ThriftOperationBackend` explicitly. Keeps the + * facade decoupled from concrete backend imports. + */ +export function createOperationForTest({ + handle, + directResults, + context, +}: CreateOperationForTestArgs): DBSQLOperation { + const backend = new ThriftOperationBackend({ handle, directResults, context }); + return new DBSQLOperation({ backend, context }); +} diff --git a/tests/unit/.stubs/createSessionForTest.ts b/tests/unit/.stubs/createSessionForTest.ts new file mode 100644 index 00000000..145c438e --- /dev/null +++ b/tests/unit/.stubs/createSessionForTest.ts @@ -0,0 +1,21 @@ +import { TSessionHandle, TProtocolVersion } from '../../../thrift/TCLIService_types'; +import DBSQLSession from '../../../lib/DBSQLSession'; +import ThriftSessionBackend from '../../../lib/thrift-backend/ThriftSessionBackend'; +import IClientContext from '../../../lib/contracts/IClientContext'; + +interface CreateSessionForTestArgs { + handle: TSessionHandle; + context: IClientContext; + serverProtocolVersion?: TProtocolVersion; +} + +/** + * Test helper that mirrors the pre-PR-378 `new DBSQLSession({ handle, ... })` + * legacy ctor shape, but routes through the post-PR-378 `{ backend, ... }` + * shape by constructing a `ThriftSessionBackend` explicitly. Keeps the + * facade decoupled from concrete backend imports. + */ +export function createSessionForTest({ handle, context, serverProtocolVersion }: CreateSessionForTestArgs): DBSQLSession { + const backend = new ThriftSessionBackend({ handle, context, serverProtocolVersion }); + return new DBSQLSession({ backend, context }); +} diff --git a/tests/unit/DBSQLClient.test.ts b/tests/unit/DBSQLClient.test.ts index 4c0a3a34..8c3e64ce 100644 --- a/tests/unit/DBSQLClient.test.ts +++ b/tests/unit/DBSQLClient.test.ts @@ -2,6 +2,7 @@ import { expect, AssertionError } from 'chai'; import sinon from 'sinon'; import DBSQLClient, { ThriftLibrary } from '../../lib/DBSQLClient'; import DBSQLSession from '../../lib/DBSQLSession'; +import ThriftBackend from '../../lib/thrift-backend/ThriftBackend'; import PlainHttpAuthentication from '../../lib/connection/auth/PlainHttpAuthentication'; import DatabricksOAuth from '../../lib/connection/auth/DatabricksOAuth'; @@ -25,6 +26,19 @@ const connectOptions = { token: 'dapi********************************', } satisfies ConnectionOptions; +// Test helper: build a DBSQLClient with `getClient` stubbed to return the given +// ThriftClient stub, and pre-seed `client['backend']` with a ThriftBackend. +// Used to avoid 12 copies of the same 4-line setup across the openSession tests. +function makeStubbedClient(thriftClient: ThriftClientStub = new ThriftClientStub()): { + client: DBSQLClient; + thriftClient: ThriftClientStub; +} { + const client = new DBSQLClient(); + sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + client['backend'] = new ThriftBackend({ context: client, onConnectionEvent: () => {} }); + return { client, thriftClient }; +} + describe('DBSQLClient.connect', () => { it('should prepend "/" to path if it is missing', async () => { const client = new DBSQLClient(); @@ -103,18 +117,14 @@ describe('DBSQLClient.connect', () => { describe('DBSQLClient.openSession', () => { it('should successfully open session', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client } = makeStubbedClient(); const session = await client.openSession(); expect(session).instanceOf(DBSQLSession); }); it('should use initial namespace options', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); case1: { const initialCatalog = 'catalog1'; @@ -144,6 +154,7 @@ describe('DBSQLClient.openSession', () => { it('should throw an exception when not connected', async () => { const client = new DBSQLClient(); + client['backend'] = undefined; client['connectionProvider'] = undefined; try { @@ -158,15 +169,13 @@ describe('DBSQLClient.openSession', () => { }); it('should correctly pass server protocol version to session', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); // Test with default protocol version (SPARK_CLI_SERVICE_PROTOCOL_V8) { const session = await client.openSession(); expect(session).instanceOf(DBSQLSession); - expect((session as DBSQLSession)['serverProtocolVersion']).to.equal( + expect(((session as DBSQLSession)['backend'] as any)['serverProtocolVersion']).to.equal( TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V8, ); } @@ -179,16 +188,14 @@ describe('DBSQLClient.openSession', () => { const session = await client.openSession(); expect(session).instanceOf(DBSQLSession); - expect((session as DBSQLSession)['serverProtocolVersion']).to.equal( + expect(((session as DBSQLSession)['backend'] as any)['serverProtocolVersion']).to.equal( TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V7, ); } }); it('should pass session configuration to OpenSessionReq', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); const configuration = { QUERY_TAGS: 'team:engineering', ansi_mode: 'true' }; await client.openSession({ configuration }); @@ -196,9 +203,7 @@ describe('DBSQLClient.openSession', () => { }); it('should affect session behavior based on protocol version', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); // With protocol version V6 - should support async metadata operations { @@ -360,6 +365,7 @@ describe('DBSQLClient.close', () => { client['client'] = thriftClient; client['connectionProvider'] = new ConnectionProviderStub(); client['authProvider'] = new AuthProviderStub(); + client['backend'] = new ThriftBackend({ context: client, onConnectionEvent: () => {} }); const session = await client.openSession(); if (!(session instanceof DBSQLSession)) { @@ -583,9 +589,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should inject session parameter when enableMetricViewMetadata is true', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.connect({ ...connectOptions, enableMetricViewMetadata: true }); await client.openSession(); @@ -597,9 +601,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should not inject session parameter when enableMetricViewMetadata is false', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.connect({ ...connectOptions, enableMetricViewMetadata: false }); await client.openSession(); @@ -610,9 +612,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should not inject session parameter when enableMetricViewMetadata is not set', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.connect(connectOptions); await client.openSession(); @@ -623,9 +623,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should preserve user-provided session configuration', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.connect({ ...connectOptions, enableMetricViewMetadata: true }); const userConfig = { QUERY_TAGS: 'team:engineering', ansi_mode: 'true' }; @@ -638,9 +636,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should serialize queryTags dict and set in session configuration', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.openSession({ queryTags: { team: 'data-eng', project: 'etl' }, @@ -652,9 +648,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should let queryTags take precedence over configuration.QUERY_TAGS', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.openSession({ queryTags: { team: 'new-team' }, @@ -668,9 +662,7 @@ describe('DBSQLClient.enableMetricViewMetadata', () => { }); it('should remove QUERY_TAGS from configuration when queryTags is empty', async () => { - const client = new DBSQLClient(); - const thriftClient = new ThriftClientStub(); - sinon.stub(client, 'getClient').returns(Promise.resolve(thriftClient)); + const { client, thriftClient } = makeStubbedClient(); await client.openSession({ queryTags: {}, diff --git a/tests/unit/DBSQLOperation.test.ts b/tests/unit/DBSQLOperation.test.ts index b5f142ba..1e670c46 100644 --- a/tests/unit/DBSQLOperation.test.ts +++ b/tests/unit/DBSQLOperation.test.ts @@ -21,6 +21,7 @@ import CloudFetchResultHandler from '../../lib/result/CloudFetchResultHandler'; import ResultSlicer from '../../lib/result/ResultSlicer'; import ClientContextStub from './.stubs/ClientContextStub'; +import { createOperationForTest } from './.stubs/createOperationForTest'; import { Type } from 'apache-arrow'; function operationHandleStub(overrides: Partial): TOperationHandle { @@ -47,15 +48,15 @@ describe('DBSQLOperation', () => { describe('status', () => { it('should pick up state from operation handle', async () => { const context = new ClientContextStub(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); - expect(operation['state']).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.INITIALIZED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; }); it('should pick up state from directResults', async () => { const context = new ClientContextStub(); - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults: { @@ -67,8 +68,8 @@ describe('DBSQLOperation', () => { }, }); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; }); it('should fetch status and update internal state', async () => { @@ -77,17 +78,17 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: false }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context }); - expect(operation['state']).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.false; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.INITIALIZED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.false; const status = await operation.status(); expect(driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; }); it('should request progress', async () => { @@ -95,7 +96,7 @@ describe('DBSQLOperation', () => { const driver = sinon.spy(context.driver); driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: false }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context }); await operation.status(true); expect(driver.getOperationStatus.called).to.be.true; @@ -108,10 +109,10 @@ describe('DBSQLOperation', () => { const driver = sinon.spy(context.driver); driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: false }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context }); - expect(operation['state']).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.false; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.INITIALIZED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.false; // First call - should fetch data and cache driver.getOperationStatusResp = { @@ -122,8 +123,8 @@ describe('DBSQLOperation', () => { expect(driver.getOperationStatus.callCount).to.equal(1); expect(status1.operationState).to.equal(TOperationState.FINISHED_STATE); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; // Second call - should return cached data driver.getOperationStatusResp = { @@ -134,8 +135,8 @@ describe('DBSQLOperation', () => { expect(driver.getOperationStatus.callCount).to.equal(1); expect(status2.operationState).to.equal(TOperationState.FINISHED_STATE); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; }); it('should fetch status if directResults status is not finished', async () => { @@ -144,7 +145,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context, directResults: { @@ -156,15 +157,15 @@ describe('DBSQLOperation', () => { }, }); - expect(operation['state']).to.equal(TOperationState.RUNNING_STATE); // from directResults - expect(operation['operationHandle'].hasResultSet).to.be.false; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.RUNNING_STATE); // from directResults + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.false; const status = await operation.status(false); expect(driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.true; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.true; }); it('should not fetch status if directResults status is finished', async () => { @@ -173,7 +174,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.RUNNING_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context, directResults: { @@ -185,21 +186,21 @@ describe('DBSQLOperation', () => { }, }); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); // from directResults - expect(operation['operationHandle'].hasResultSet).to.be.false; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); // from directResults + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.false; const status = await operation.status(false); expect(driver.getOperationStatus.called).to.be.false; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); - expect(operation['operationHandle'].hasResultSet).to.be.false; + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['operationHandle'].hasResultSet).to.be.false; }); it('should throw an error in case of a status error', async () => { const context = new ClientContextStub(); context.driver.getOperationStatusResp.status.statusCode = TStatusCode.ERROR_STATUS; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); try { await operation.status(false); @@ -217,7 +218,7 @@ describe('DBSQLOperation', () => { it('should cancel operation and update state', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -232,7 +233,7 @@ describe('DBSQLOperation', () => { it('should return immediately if already cancelled', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -251,7 +252,7 @@ describe('DBSQLOperation', () => { it('should return immediately if already closed', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -270,7 +271,7 @@ describe('DBSQLOperation', () => { it('should throw an error in case of a status error and keep state', async () => { const context = new ClientContextStub(); context.driver.cancelOperationResp.status.statusCode = TStatusCode.ERROR_STATUS; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -290,7 +291,7 @@ describe('DBSQLOperation', () => { it('should reject all methods once cancelled', async () => { const context = new ClientContextStub(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); await operation.cancel(); expect(operation['cancelled']).to.be.true; @@ -307,7 +308,7 @@ describe('DBSQLOperation', () => { it('should close operation and update state', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -322,7 +323,7 @@ describe('DBSQLOperation', () => { it('should return immediately if already closed', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -341,7 +342,7 @@ describe('DBSQLOperation', () => { it('should return immediately if already cancelled', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -361,7 +362,7 @@ describe('DBSQLOperation', () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults: { @@ -385,7 +386,7 @@ describe('DBSQLOperation', () => { it('should throw an error in case of a status error and keep state', async () => { const context = new ClientContextStub(); context.driver.closeOperationResp.status.statusCode = TStatusCode.ERROR_STATUS; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(operation['cancelled']).to.be.false; expect(operation['closed']).to.be.false; @@ -405,7 +406,7 @@ describe('DBSQLOperation', () => { it('should reject all methods once closed', async () => { const context = new ClientContextStub(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); await operation.close(); expect(operation['closed']).to.be.true; @@ -437,14 +438,14 @@ describe('DBSQLOperation', () => { return getOperationStatusStub.wrappedMethod.apply(context.driver, args); }); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); - expect(operation['state']).to.equal(TOperationState.INITIALIZED_STATE); + expect((operation['backend'] as any)['state']).to.equal(TOperationState.INITIALIZED_STATE); await operation.finished(); expect(getOperationStatusStub.callCount).to.be.equal(attemptsUntilFinished); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); }); }, ); @@ -463,7 +464,7 @@ describe('DBSQLOperation', () => { return getOperationStatusStub.wrappedMethod.apply(context.driver, args); }); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); await operation.finished({ progress: true }); expect(getOperationStatusStub.called).to.be.true; @@ -487,7 +488,7 @@ describe('DBSQLOperation', () => { return getOperationStatusStub.wrappedMethod.apply(context.driver, args); }); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const callback = sinon.stub(); @@ -503,7 +504,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.status.statusCode = TStatusCode.SUCCESS_STATUS; driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults: { @@ -526,7 +527,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.status.statusCode = TStatusCode.ERROR_STATUS; context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); try { await operation.finished(); @@ -551,7 +552,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.status.statusCode = TStatusCode.SUCCESS_STATUS; context.driver.getOperationStatusResp.operationState = operationState; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); try { await operation.finished(); @@ -573,7 +574,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = false; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: false }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context }); const schema = await operation.getSchema(); @@ -597,13 +598,13 @@ describe('DBSQLOperation', () => { context.driver.getResultSetMetadataResp.schema = { columns: [] }; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const schema = await operation.getSchema(); expect(getOperationStatusStub.called).to.be.true; expect(schema).to.deep.equal(context.driver.getResultSetMetadataResp.schema); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); }); it('should request progress', async () => { @@ -620,7 +621,7 @@ describe('DBSQLOperation', () => { return getOperationStatusStub.wrappedMethod.apply(context.driver, args); }); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); await operation.getSchema({ progress: true }); expect(getOperationStatusStub.called).to.be.true; @@ -644,7 +645,7 @@ describe('DBSQLOperation', () => { return getOperationStatusStub.wrappedMethod.apply(context.driver, args); }); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const callback = sinon.stub(); @@ -660,7 +661,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const schema = await operation.getSchema(); expect(schema).to.deep.equal(driver.getResultSetMetadataResp.schema); @@ -673,7 +674,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const schema1 = await operation.getSchema(); expect(schema1).to.deep.equal(context.driver.getResultSetMetadataResp.schema); @@ -710,7 +711,7 @@ describe('DBSQLOperation', () => { }, }, }; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults, @@ -728,7 +729,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.hasResultSet = true; context.driver.getResultSetMetadataResp.status.statusCode = TStatusCode.ERROR_STATUS; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); try { await operation.getSchema(); @@ -751,8 +752,8 @@ describe('DBSQLOperation', () => { driver.getResultSetMetadataResp.resultFormat = TSparkRowSetType.COLUMN_BASED_SET; driver.getResultSetMetadata.resetHistory(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); - const resultHandler = await operation['getResultHandler'](); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); + const resultHandler = await (operation['backend'] as any)['getResultHandler'](); expect(driver.getResultSetMetadata.called).to.be.true; expect(resultHandler).to.be.instanceOf(ResultSlicer); expect(resultHandler['source']).to.be.instanceOf(JsonResultHandler); @@ -762,8 +763,8 @@ describe('DBSQLOperation', () => { driver.getResultSetMetadataResp.resultFormat = TSparkRowSetType.ARROW_BASED_SET; driver.getResultSetMetadata.resetHistory(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); - const resultHandler = await operation['getResultHandler'](); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); + const resultHandler = await (operation['backend'] as any)['getResultHandler'](); expect(driver.getResultSetMetadata.called).to.be.true; expect(resultHandler).to.be.instanceOf(ResultSlicer); expect(resultHandler['source']).to.be.instanceOf(ArrowResultConverter); @@ -777,8 +778,8 @@ describe('DBSQLOperation', () => { driver.getResultSetMetadataResp.resultFormat = TSparkRowSetType.URL_BASED_SET; driver.getResultSetMetadata.resetHistory(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); - const resultHandler = await operation['getResultHandler'](); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); + const resultHandler = await (operation['backend'] as any)['getResultHandler'](); expect(driver.getResultSetMetadata.called).to.be.true; expect(resultHandler).to.be.instanceOf(ResultSlicer); expect(resultHandler['source']).to.be.instanceOf(ArrowResultConverter); @@ -795,7 +796,7 @@ describe('DBSQLOperation', () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: false }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: false }), context }); const results = await operation.fetchChunk({ disableBuffering: true }); @@ -822,13 +823,13 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = false; context.driver.fetchResultsResp.results!.columns = []; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const results = await operation.fetchChunk({ disableBuffering: true }); expect(getOperationStatusStub.called).to.be.true; expect(results).to.deep.equal([]); - expect(operation['state']).to.equal(TOperationState.FINISHED_STATE); + expect((operation['backend'] as any)['state']).to.equal(TOperationState.FINISHED_STATE); }); it('should request progress', async () => { @@ -849,7 +850,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = false; context.driver.fetchResultsResp.results!.columns = []; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); await operation.fetchChunk({ progress: true, disableBuffering: true }); expect(getOperationStatusStub.called).to.be.true; @@ -877,7 +878,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = false; context.driver.fetchResultsResp.results!.columns = []; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const callback = sinon.stub(); @@ -893,7 +894,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const results = await operation.fetchChunk({ disableBuffering: true }); @@ -907,7 +908,7 @@ describe('DBSQLOperation', () => { const driver = sinon.spy(context.driver); driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults: { @@ -943,7 +944,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context, directResults: { @@ -986,7 +987,7 @@ describe('DBSQLOperation', () => { context.driver.getResultSetMetadataResp.resultFormat = TSparkRowSetType.ROW_BASED_SET; context.driver.getResultSetMetadataResp.schema = { columns: [] }; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); try { await operation.fetchChunk({ disableBuffering: true }); @@ -1003,7 +1004,7 @@ describe('DBSQLOperation', () => { describe('fetchAll', () => { it('should fetch data while available and return it all', async () => { const context = new ClientContextStub(); - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); const originalData = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; @@ -1038,13 +1039,13 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = false; context.driver.fetchResultsResp.results = undefined; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.undefined; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.false; - expect(operation['_data']['hasMoreRowsFlag']).to.be.false; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.false; }); it('should return False if operation was closed', async () => { @@ -1053,7 +1054,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; await operation.fetchChunk({ disableBuffering: true }); @@ -1068,7 +1069,7 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; await operation.fetchChunk({ disableBuffering: true }); @@ -1083,13 +1084,13 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.undefined; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.true; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.true; }); it('should return True if hasMoreRows flag is False but there is actual data', async () => { @@ -1098,13 +1099,13 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = false; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.undefined; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.true; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.true; }); it('should return True if hasMoreRows flag is unset but there is actual data', async () => { @@ -1113,13 +1114,13 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = undefined; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.undefined; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.true; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.true; }); it('should return False if hasMoreRows flag is False and there is no data', async () => { @@ -1129,13 +1130,13 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.hasResultSet = true; context.driver.fetchResultsResp.hasMoreRows = false; context.driver.fetchResultsResp.results = undefined; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); expect(await operation.hasMoreRows()).to.be.true; - expect(operation['_data']['hasMoreRowsFlag']).to.be.undefined; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.false; - expect(operation['_data']['hasMoreRowsFlag']).to.be.false; + expect((operation['backend'] as any)['_data']['hasMoreRowsFlag']).to.be.false; }); }); @@ -1147,7 +1148,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.hasResultSet = true; // Create operation without direct results to force metadata fetching - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); // Trigger multiple concurrent metadata fetches const results = await Promise.all([operation.hasMoreRows(), operation.hasMoreRows(), operation.hasMoreRows()]); @@ -1165,7 +1166,7 @@ describe('DBSQLOperation', () => { driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; driver.getOperationStatusResp.hasResultSet = true; - const operation = new DBSQLOperation({ handle: operationHandleStub({ hasResultSet: true }), context }); + const operation = createOperationForTest({ handle: operationHandleStub({ hasResultSet: true }), context }); // First call should fetch metadata await operation.hasMoreRows(); diff --git a/tests/unit/DBSQLSession.test.ts b/tests/unit/DBSQLSession.test.ts index 0dc79037..51b27133 100644 --- a/tests/unit/DBSQLSession.test.ts +++ b/tests/unit/DBSQLSession.test.ts @@ -7,6 +7,7 @@ import Status from '../../lib/dto/Status'; import DBSQLOperation from '../../lib/DBSQLOperation'; import { TSessionHandle, TProtocolVersion } from '../../thrift/TCLIService_types'; import ClientContextStub from './.stubs/ClientContextStub'; +import { createSessionForTest } from './.stubs/createSessionForTest'; const sessionHandleStub: TSessionHandle = { sessionId: { guid: Buffer.alloc(16), secret: Buffer.alloc(16) }, @@ -50,7 +51,7 @@ describe('DBSQLSession', () => { describe('getInfo', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getInfo(1); expect(result).instanceOf(InfoValue); }); @@ -58,26 +59,26 @@ describe('DBSQLSession', () => { describe('executeStatement', () => { it('should execute statement', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.executeStatement('SELECT * FROM table'); expect(result).instanceOf(DBSQLOperation); }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.executeStatement('SELECT * FROM table', { maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.executeStatement('SELECT * FROM table', { maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); describe('Arrow support', () => { it('should not use Arrow if disabled in options', async () => { - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub({ arrowEnabled: false }), }); @@ -88,7 +89,7 @@ describe('DBSQLSession', () => { it('should apply defaults for Arrow options', async () => { // case 1 { - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub({ arrowEnabled: true }), }); @@ -98,7 +99,7 @@ describe('DBSQLSession', () => { // case 2 { - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub({ arrowEnabled: true, useArrowNativeTypes: false }), }); @@ -133,7 +134,7 @@ describe('DBSQLSession', () => { useLZ4Compression: true, }; - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context, serverProtocolVersion: version, @@ -195,7 +196,7 @@ describe('DBSQLSession', () => { const statement = 'SELECT * FROM table'; // Use V6+ which supports arrow compression - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context, serverProtocolVersion: TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V6, @@ -218,7 +219,7 @@ describe('DBSQLSession', () => { const statement = 'SELECT * FROM table'; // Use V6+ which supports arrow compression - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context, serverProtocolVersion: TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V6, @@ -241,7 +242,7 @@ describe('DBSQLSession', () => { const statement = 'SELECT * FROM table'; // Use V5 which does not support arrow compression - const session = new DBSQLSession({ + const session = createSessionForTest({ handle: sessionHandleStub, context, serverProtocolVersion: TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V5, @@ -263,7 +264,7 @@ describe('DBSQLSession', () => { it('should set confOverlay with query_tags when queryTags are provided', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const session = new DBSQLSession({ handle: sessionHandleStub, context }); + const session = createSessionForTest({ handle: sessionHandleStub, context }); await session.executeStatement('SELECT 1', { queryTags: { team: 'eng', app: 'etl' } }); @@ -275,7 +276,7 @@ describe('DBSQLSession', () => { it('should not set confOverlay query_tags when queryTags is not provided', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const session = new DBSQLSession({ handle: sessionHandleStub, context }); + const session = createSessionForTest({ handle: sessionHandleStub, context }); await session.executeStatement('SELECT 1'); @@ -287,7 +288,7 @@ describe('DBSQLSession', () => { it('should not set confOverlay query_tags when queryTags is empty', async () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const session = new DBSQLSession({ handle: sessionHandleStub, context }); + const session = createSessionForTest({ handle: sessionHandleStub, context }); await session.executeStatement('SELECT 1', { queryTags: {} }); @@ -299,19 +300,19 @@ describe('DBSQLSession', () => { describe('getTypeInfo', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTypeInfo(); expect(result).instanceOf(DBSQLOperation); }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTypeInfo({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTypeInfo({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -319,19 +320,19 @@ describe('DBSQLSession', () => { describe('getCatalogs', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCatalogs(); expect(result).instanceOf(DBSQLOperation); }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCatalogs({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCatalogs({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -339,13 +340,13 @@ describe('DBSQLSession', () => { describe('getSchemas', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getSchemas(); expect(result).instanceOf(DBSQLOperation); }); it('should use filters', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getSchemas({ catalogName: 'catalog', schemaName: 'schema', @@ -354,13 +355,13 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getSchemas({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getSchemas({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -368,13 +369,13 @@ describe('DBSQLSession', () => { describe('getTables', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTables(); expect(result).instanceOf(DBSQLOperation); }); it('should use filters', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTables({ catalogName: 'catalog', schemaName: 'default', @@ -385,13 +386,13 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTables({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTables({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -399,19 +400,19 @@ describe('DBSQLSession', () => { describe('getTableTypes', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTableTypes(); expect(result).instanceOf(DBSQLOperation); }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTableTypes({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getTableTypes({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -419,13 +420,13 @@ describe('DBSQLSession', () => { describe('getColumns', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getColumns(); expect(result).instanceOf(DBSQLOperation); }); it('should use filters', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getColumns({ catalogName: 'catalog', schemaName: 'schema', @@ -436,13 +437,13 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getColumns({ maxRows: 10 }); expect(result).instanceOf(DBSQLOperation); }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getColumns({ maxRows: null }); expect(result).instanceOf(DBSQLOperation); }); @@ -450,7 +451,7 @@ describe('DBSQLSession', () => { describe('getFunctions', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getFunctions({ catalogName: 'catalog', schemaName: 'schema', @@ -460,7 +461,7 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getFunctions({ catalogName: 'catalog', schemaName: 'schema', @@ -471,7 +472,7 @@ describe('DBSQLSession', () => { }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getFunctions({ catalogName: 'catalog', schemaName: 'schema', @@ -484,7 +485,7 @@ describe('DBSQLSession', () => { describe('getPrimaryKeys', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getPrimaryKeys({ catalogName: 'catalog', schemaName: 'schema', @@ -494,7 +495,7 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getPrimaryKeys({ catalogName: 'catalog', schemaName: 'schema', @@ -505,7 +506,7 @@ describe('DBSQLSession', () => { }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getPrimaryKeys({ catalogName: 'catalog', schemaName: 'schema', @@ -518,7 +519,7 @@ describe('DBSQLSession', () => { describe('getCrossReference', () => { it('should run operation', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCrossReference({ parentCatalogName: 'parentCatalogName', parentSchemaName: 'parentSchemaName', @@ -531,7 +532,7 @@ describe('DBSQLSession', () => { }); it('should use direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCrossReference({ parentCatalogName: 'parentCatalogName', parentSchemaName: 'parentSchemaName', @@ -545,7 +546,7 @@ describe('DBSQLSession', () => { }); it('should disable direct results', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const result = await session.getCrossReference({ parentCatalogName: 'parentCatalogName', parentSchemaName: 'parentSchemaName', @@ -564,7 +565,7 @@ describe('DBSQLSession', () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const session = new DBSQLSession({ handle: sessionHandleStub, context }); + const session = createSessionForTest({ handle: sessionHandleStub, context }); expect(session['isOpen']).to.be.true; const result = await session.close(); @@ -577,7 +578,7 @@ describe('DBSQLSession', () => { const context = new ClientContextStub(); const driver = sinon.spy(context.driver); - const session = new DBSQLSession({ handle: sessionHandleStub, context }); + const session = createSessionForTest({ handle: sessionHandleStub, context }); expect(session['isOpen']).to.be.true; const result = await session.close(); @@ -592,7 +593,7 @@ describe('DBSQLSession', () => { }); it('should close operations that belong to it', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); const operation = await session.executeStatement('SELECT * FROM table'); if (!(operation instanceof DBSQLOperation)) { expect.fail('Assertion error: operation is not a DBSQLOperation'); @@ -614,7 +615,7 @@ describe('DBSQLSession', () => { }); it('should reject all methods once closed', async () => { - const session = new DBSQLSession({ handle: sessionHandleStub, context: new ClientContextStub() }); + const session = createSessionForTest({ handle: sessionHandleStub, context: new ClientContextStub() }); await session.close(); expect(session['isOpen']).to.be.false; diff --git a/tests/unit/sea/SeaBackend.test.ts b/tests/unit/sea/SeaBackend.test.ts new file mode 100644 index 00000000..ff9e45c9 --- /dev/null +++ b/tests/unit/sea/SeaBackend.test.ts @@ -0,0 +1,39 @@ +import { expect, AssertionError } from 'chai'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import { ConnectionOptions, OpenSessionRequest } from '../../../lib/contracts/IDBSQLClient'; + +describe('SeaBackend stub', () => { + it('connect() rejects with HiveDriverError until M1 wires the binding', async () => { + const backend = new SeaBackend(); + try { + await backend.connect({ host: '', path: '', token: '' } as ConnectionOptions); + expect.fail('It should throw an error'); + } catch (error) { + if (error instanceof AssertionError || !(error instanceof Error)) { + throw error; + } + expect(error).to.be.instanceOf(HiveDriverError); + expect(error.message).to.contain('not implemented'); + } + }); + + it('openSession() rejects with HiveDriverError until M1 wires the binding', async () => { + const backend = new SeaBackend(); + try { + await backend.openSession({} as OpenSessionRequest); + expect.fail('It should throw an error'); + } catch (error) { + if (error instanceof AssertionError || !(error instanceof Error)) { + throw error; + } + expect(error).to.be.instanceOf(HiveDriverError); + expect(error.message).to.contain('not implemented'); + } + }); + + it('close() is a no-op so DBSQLClient.close() can finish state-clearing after a failed connect', async () => { + const backend = new SeaBackend(); + await backend.close(); + }); +}); diff --git a/tests/unit/sea/error-mapping.test.ts b/tests/unit/sea/error-mapping.test.ts new file mode 100644 index 00000000..8331bc57 --- /dev/null +++ b/tests/unit/sea/error-mapping.test.ts @@ -0,0 +1,227 @@ +import { expect } from 'chai'; +import { + mapKernelErrorToJsError, + KernelErrorCode, + KernelErrorShape, +} from '../../../lib/sea/SeaErrorMapping'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import OperationStateError, { + OperationStateErrorCode, +} from '../../../lib/errors/OperationStateError'; +import ParameterError from '../../../lib/errors/ParameterError'; + +describe('SeaErrorMapping.mapKernelErrorToJsError', () => { + // The 13 kernel ErrorCode variants — kept in sync with src/kernel_error.rs:66-134. + // Tabular driver: each row is (kernel code, expected class, optional extra assertion). + type Case = { + code: KernelErrorCode; + expectedClass: Function; + extra?: (err: Error) => void; + }; + + const cases: Array = [ + { + code: 'InvalidArgument', + expectedClass: ParameterError, + }, + { + code: 'Unauthenticated', + expectedClass: AuthenticationError, + }, + { + code: 'PermissionDenied', + expectedClass: AuthenticationError, + }, + { + code: 'NotFound', + expectedClass: HiveDriverError, + }, + { + code: 'ResourceExhausted', + expectedClass: HiveDriverError, + }, + { + code: 'Unavailable', + expectedClass: HiveDriverError, + }, + { + code: 'Timeout', + expectedClass: OperationStateError, + extra: (err) => { + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Timeout); + }, + }, + { + code: 'Cancelled', + expectedClass: OperationStateError, + extra: (err) => { + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Canceled); + }, + }, + { + code: 'DataLoss', + expectedClass: HiveDriverError, + }, + { + code: 'Internal', + expectedClass: HiveDriverError, + }, + { + code: 'InvalidStatementHandle', + expectedClass: HiveDriverError, + }, + { + code: 'NetworkError', + expectedClass: HiveDriverError, + }, + { + code: 'SqlError', + expectedClass: HiveDriverError, + }, + ]; + + it('covers all 13 kernel ErrorCode variants', () => { + // Guardrail: if the kernel adds a variant, KernelErrorCode in TS will gain + // a literal — this test then fails because the new variant has no case row. + // (Drift is caught at the test level since the union itself is an inline literal.) + expect(cases).to.have.lengthOf(13); + }); + + cases.forEach(({ code, expectedClass, extra }) => { + it(`maps ${code} to ${expectedClass.name}`, () => { + const kErr: KernelErrorShape = { + code, + message: `kernel ${code} message`, + }; + + const err = mapKernelErrorToJsError(kErr); + + expect(err).to.be.instanceOf(expectedClass); + expect(err.message).to.equal(`kernel ${code} message`); + if (extra) { + extra(err); + } + }); + }); + + describe('SQLSTATE preservation', () => { + it('attaches sqlState when present on the kernel error', () => { + const err = mapKernelErrorToJsError({ + code: 'SqlError', + message: 'syntax error', + sqlstate: '42000', + }); + + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.sqlState).to.equal('42000'); + }); + + it('does not set sqlState when absent', () => { + const err = mapKernelErrorToJsError({ + code: 'Internal', + message: 'boom', + }); + + expect(err.sqlState).to.be.undefined; + }); + + it('preserves sqlState on AuthenticationError', () => { + const err = mapKernelErrorToJsError({ + code: 'Unauthenticated', + message: 'invalid token', + sqlstate: '28000', + }); + + expect(err).to.be.instanceOf(AuthenticationError); + expect(err.sqlState).to.equal('28000'); + }); + + it('preserves sqlState on OperationStateError', () => { + const err = mapKernelErrorToJsError({ + code: 'Timeout', + message: 'deadline exceeded', + sqlstate: 'HYT01', + }); + + expect(err).to.be.instanceOf(OperationStateError); + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Timeout); + expect(err.sqlState).to.equal('HYT01'); + }); + + it('preserves sqlState on ParameterError', () => { + const err = mapKernelErrorToJsError({ + code: 'InvalidArgument', + message: 'bad param', + sqlstate: 'HY009', + }); + + expect(err).to.be.instanceOf(ParameterError); + expect(err.sqlState).to.equal('HY009'); + }); + + it('attaches sqlState as a non-enumerable property', () => { + const err = mapKernelErrorToJsError({ + code: 'SqlError', + message: 'oops', + sqlstate: '42000', + }); + + const descriptor = Object.getOwnPropertyDescriptor(err, 'sqlState'); + expect(descriptor).to.exist; + expect(descriptor!.enumerable).to.equal(false); + expect(descriptor!.writable).to.equal(true); + expect(descriptor!.configurable).to.equal(true); + }); + }); + + describe('unknown / future kernel codes', () => { + it('falls back to HiveDriverError for an unrecognised code', () => { + const err = mapKernelErrorToJsError({ + code: 'SomeFutureVariantThatDoesNotExist', + message: 'forward-compat message', + }); + + // Never silently drop — must surface as the base driver class. + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.message).to.equal('forward-compat message'); + }); + + it('still preserves sqlState on a fallback HiveDriverError', () => { + const err = mapKernelErrorToJsError({ + code: 'BrandNewVariant', + message: 'with sqlstate', + sqlstate: '01004', + }); + + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.sqlState).to.equal('01004'); + }); + }); + + describe('returned errors compose with try/catch', () => { + it('thrown errors are catchable as Error', () => { + function thrower() { + throw mapKernelErrorToJsError({ code: 'Internal', message: 'kaboom' }); + } + + expect(thrower).to.throw(Error, 'kaboom'); + expect(thrower).to.throw(HiveDriverError, 'kaboom'); + }); + + it('AuthenticationError thrown is also instanceOf HiveDriverError', () => { + // AuthenticationError extends HiveDriverError — preserve that hierarchy. + const err = mapKernelErrorToJsError({ code: 'Unauthenticated', message: 'nope' }); + expect(err).to.be.instanceOf(AuthenticationError); + expect(err).to.be.instanceOf(HiveDriverError); + expect(err).to.be.instanceOf(Error); + }); + + it('ParameterError does NOT extend HiveDriverError (matches existing class hierarchy)', () => { + const err = mapKernelErrorToJsError({ code: 'InvalidArgument', message: 'bad' }); + expect(err).to.be.instanceOf(ParameterError); + expect(err).to.not.be.instanceOf(HiveDriverError); + expect(err).to.be.instanceOf(Error); + }); + }); +}); diff --git a/tests/unit/sea/loader.test.ts b/tests/unit/sea/loader.test.ts new file mode 100644 index 00000000..39bf610f --- /dev/null +++ b/tests/unit/sea/loader.test.ts @@ -0,0 +1,149 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { SeaNativeLoader, SeaNativeBinding } from '../../../lib/sea/SeaNativeLoader'; + +// Pure-logic tests for SeaNativeLoader. These exercise the load-failure +// hint branches, the Node-version gate, the shape check, and caching via +// the injectable `load` seam — so they run everywhere regardless of +// whether a real `.node` is installed on the test machine. + +function stubBinding(overrides: Partial> = {}): SeaNativeBinding { + return { + version: () => '1.2.3', + openSession: async () => ({}), + Connection: function Connection() {}, + Statement: function Statement() {}, + ...overrides, + } as unknown as SeaNativeBinding; +} + +function errWithCode(code: string, message: string): NodeJS.ErrnoException { + const err = new Error(message) as NodeJS.ErrnoException; + err.code = code; + return err; +} + +// Capture the message of the error thrown by `fn` (fails the test if +// nothing is thrown). Lets a single failure be asserted against several +// substrings without chai's `.and.to.throw` re-targeting quirk. +function thrownMessage(fn: () => unknown): string { + try { + fn(); + } catch (err) { + return err instanceof Error ? err.message : String(err); + } + return expect.fail('expected the call to throw, but it did not') as never; +} + +describe('SeaNativeLoader', () => { + describe('successful load', () => { + it('get() returns the binding from the injected loader', () => { + const binding = stubBinding(); + const loader = new SeaNativeLoader(() => binding); + expect(loader.get()).to.equal(binding); + expect(loader.tryGet()).to.equal(binding); + }); + + it('caches the result — the load function runs at most once', () => { + let calls = 0; + const binding = stubBinding(); + const loader = new SeaNativeLoader(() => { + calls += 1; + return binding; + }); + loader.get(); + loader.tryGet(); + loader.get(); + expect(calls).to.equal(1); + }); + }); + + describe('load-failure hints', () => { + it('MODULE_NOT_FOUND → "not installed" hint pointing at the README', () => { + const loader = new SeaNativeLoader(() => { + throw errWithCode('MODULE_NOT_FOUND', "Cannot find module '../../native/sea'"); + }); + expect(loader.tryGet()).to.equal(undefined); + const msg = thrownMessage(() => loader.get()); + expect(msg).to.match(/not installed/); + expect(msg).to.match(/README/); + }); + + it('ERR_DLOPEN_FAILED → includes the underlying dlerror string and remediation', () => { + const loader = new SeaNativeLoader(() => { + throw errWithCode('ERR_DLOPEN_FAILED', 'GLIBC_2.32 not found'); + }); + const msg = thrownMessage(() => loader.get()); + expect(msg).to.match(/GLIBC_2\.32 not found/); + expect(msg).to.match(/musl/); + expect(msg).to.match(/rm -rf node_modules/); + }); + + it('a generic Error (no code) preserves its message', () => { + const loader = new SeaNativeLoader(() => { + throw new Error('totally unexpected'); + }); + expect(() => loader.get()).to.throw(/totally unexpected/); + }); + + it('a non-Error throw is wrapped', () => { + const loader = new SeaNativeLoader(() => { + // eslint-disable-next-line no-throw-literal + throw 'a string'; + }); + expect(() => loader.get()).to.throw(/non-standard error/); + }); + }); + + describe('shape check', () => { + it('rejects a binding missing an expected export', () => { + const loader = new SeaNativeLoader(() => stubBinding({ openSession: undefined })); + expect(loader.tryGet()).to.equal(undefined); + const msg = thrownMessage(() => loader.get()); + expect(msg).to.match(/missing expected export/); + expect(msg).to.match(/openSession/); + }); + }); + + describe('Node-version gate', () => { + it('fails closed on a Node version below the floor', () => { + const original = process.version; + try { + Object.defineProperty(process, 'version', { value: 'v16.20.0', configurable: true }); + let loadCalled = false; + const loader = new SeaNativeLoader(() => { + loadCalled = true; + return stubBinding(); + }); + expect(() => loader.get()).to.throw(/requires Node >=18/); + expect(loadCalled, 'load() must not be attempted on an unsupported Node').to.equal(false); + } finally { + Object.defineProperty(process, 'version', { value: original, configurable: true }); + } + }); + + it('fails closed when the Node version is unparseable (NaN)', () => { + const original = process.version; + try { + Object.defineProperty(process, 'version', { value: 'vNOT-A-VERSION', configurable: true }); + const loader = new SeaNativeLoader(() => stubBinding()); + expect(() => loader.get()).to.throw(/requires Node >=18/); + } finally { + Object.defineProperty(process, 'version', { value: original, configurable: true }); + } + }); + }); +}); diff --git a/tests/unit/sea/version.test.ts b/tests/unit/sea/version.test.ts new file mode 100644 index 00000000..a6c8c1fc --- /dev/null +++ b/tests/unit/sea/version.test.ts @@ -0,0 +1,59 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { tryGetSeaNative } from '../../../lib/sea/SeaNativeLoader'; + +// On a CI runner whose triple is supposed to have a published binding +// (M0 = linux-x64-gnu) a missing binding is a hard failure — a silent +// skip there would mask a broken build / packaging regression. On every +// other platform (and on dev machines) the binding is optional, so we +// skip. +function bindingIsExpected(): boolean { + return process.env.CI === 'true' && process.platform === 'linux' && process.arch === 'x64'; +} + +describe('SEA native binding — smoke test', function smoke() { + const binding = tryGetSeaNative(); + + if (binding === undefined) { + if (bindingIsExpected()) { + it('fails loudly: the binding must load on the linux-x64 CI runner', () => { + expect.fail( + 'SEA native binding failed to load on a linux-x64 CI runner where ' + + '@databricks/sql-kernel-linux-x64-gnu is expected. Run `npm run build:native` or check packaging.', + ); + }); + return; + } + // Optional dependency absent on this platform — skip rather than fail. + // eslint-disable-next-line no-invalid-this + this.pending = true; + it.skip('SEA native binding not available on this platform'); + return; + } + + it('returns a semver version()', () => { + expect(binding.version()).to.match(/^\d+\.\d+\.\d+$/); + }); + + it('exposes the full binding surface the driver depends on', () => { + // Guards against kernel-side renames: if the kernel drops/renames a + // free function or class, this fails instead of staying green. + expect(binding.version, 'version()').to.be.a('function'); + expect(binding.openSession, 'openSession()').to.be.a('function'); + expect(binding.Connection, 'Connection class').to.be.a('function'); + expect(binding.Statement, 'Statement class').to.be.a('function'); + }); +}); diff --git a/tsconfig.json b/tsconfig.json index 9da406df..767f4166 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -6,7 +6,8 @@ "sourceMap": true, "strict": true, "esModuleInterop": true, - "forceConsistentCasingInFileNames": true + "forceConsistentCasingInFileNames": true, + "baseUrl": "./" }, "exclude": ["./dist/**/*"] }