From b10a592fc23b9b14138859ea315da60ded767db1 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 27 May 2026 20:49:16 +0530 Subject: [PATCH 1/4] feat(telemetry): populate sql_operation, auth_type, and driver_connection_params Aligns the Node.js telemetry payload with the receiver schema (JDBC parity). Several fields were being collected by the producer/aggregator but dropped on the floor in the exporter: - `sql_operation.operation_detail.operation_type` (CREATE_SESSION / DELETE_SESSION / EXECUTE_STATEMENT / LIST_*) on both connection and statement events. - `sql_operation.is_compressed` on statement events when CloudFetch compression observability is available. - `sql_operation.execution_result` now resolves to `FORMAT_UNSPECIFIED` when result-set metadata isn't available (DDL/DML, or SELECTs closed without fetching), so the `sql_operation` block fires on every statement-complete event instead of being skipped. - Top-level `auth_type` from `DriverConfiguration.authType`. - New `driver_connection_params` block carrying `host_info.host_url`, `http_path`, `enable_arrow`, `enable_direct_results`, `socket_timeout`, `enable_metric_view_metadata`, `cloud_fetch_enabled`, `lz4_enabled`, `retry_max_attempts`, `cloud_fetch_concurrent_downloads`. Both `auth_type` and `driver_connection_params` are gated behind the existing authenticated-export guard (same path as `system_configuration`), since `host_url` and `http_path` are workspace-correlated identifiers that must not ship on the unauthenticated endpoint. Co-authored-by: Isaac --- lib/telemetry/DatabricksTelemetryExporter.ts | 52 +++++++++++++++++++- lib/telemetry/telemetryTypeMappers.ts | 4 +- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/lib/telemetry/DatabricksTelemetryExporter.ts b/lib/telemetry/DatabricksTelemetryExporter.ts index bfa0f1b1..bd476e14 100644 --- a/lib/telemetry/DatabricksTelemetryExporter.ts +++ b/lib/telemetry/DatabricksTelemetryExporter.ts @@ -60,9 +60,26 @@ interface DatabricksTelemetryLog { char_set_encoding?: string; process_name?: string; }; + auth_type?: string; + driver_connection_params?: { + host_info?: { host_url?: string }; + http_path?: string; + enable_arrow?: boolean; + enable_direct_results?: boolean; + socket_timeout?: number; + enable_metric_view_metadata?: boolean; + cloud_fetch_enabled?: boolean; + lz4_enabled?: boolean; + retry_max_attempts?: number; + cloud_fetch_concurrent_downloads?: number; + }; operation_latency_ms?: number; sql_operation?: { execution_result?: string; + is_compressed?: boolean; + operation_detail?: { + operation_type?: string; + }; chunk_details?: { total_chunks_present?: number; total_chunks_iterated?: number; @@ -368,6 +385,11 @@ export default class DatabricksTelemetryExporter { if (metric.latencyMs !== undefined) { log.entry.sql_driver_log.operation_latency_ms = metric.latencyMs; } + if (metric.operationType) { + log.entry.sql_driver_log.sql_operation = { + operation_detail: { operation_type: metric.operationType }, + }; + } if (metric.driverConfig && includeCorrelation) { // system_configuration is a high-entropy client fingerprint (OS, arch, // locale, process, runtime). Only ship on the authenticated path. @@ -384,15 +406,43 @@ export default class DatabricksTelemetryExporter { char_set_encoding: metric.driverConfig.charSetEncoding, process_name: sanitizeProcessName(metric.driverConfig.processName) || undefined, }; + + // auth_type and host/http-path are workspace-correlated, so they ride + // the same auth-only path as system_configuration. + if (metric.driverConfig.authType) { + log.entry.sql_driver_log.auth_type = metric.driverConfig.authType; + } + log.entry.sql_driver_log.driver_connection_params = { + host_info: { host_url: this.host }, + http_path: metric.driverConfig.httpPath, + enable_arrow: metric.driverConfig.arrowEnabled, + enable_direct_results: metric.driverConfig.directResultsEnabled, + socket_timeout: metric.driverConfig.socketTimeout, + enable_metric_view_metadata: metric.driverConfig.enableMetricViewMetadata, + cloud_fetch_enabled: metric.driverConfig.cloudFetchEnabled, + lz4_enabled: metric.driverConfig.lz4Enabled, + retry_max_attempts: metric.driverConfig.retryMaxAttempts, + cloud_fetch_concurrent_downloads: metric.driverConfig.cloudFetchConcurrentDownloads, + }; } } else if (metric.metricType === 'statement') { log.entry.sql_driver_log.operation_latency_ms = metric.latencyMs; - if (metric.resultFormat || metric.chunkCount) { + if (metric.resultFormat || metric.chunkCount || metric.operationType || metric.compressed !== undefined) { log.entry.sql_driver_log.sql_operation = { execution_result: metric.resultFormat, }; + if (metric.compressed !== undefined) { + log.entry.sql_driver_log.sql_operation.is_compressed = metric.compressed; + } + + if (metric.operationType) { + log.entry.sql_driver_log.sql_operation.operation_detail = { + operation_type: metric.operationType, + }; + } + if ((metric.chunkCount ?? 0) > 0) { log.entry.sql_driver_log.sql_operation.chunk_details = { total_chunks_present: metric.chunkCount, diff --git a/lib/telemetry/telemetryTypeMappers.ts b/lib/telemetry/telemetryTypeMappers.ts index d022739d..1cd6caeb 100644 --- a/lib/telemetry/telemetryTypeMappers.ts +++ b/lib/telemetry/telemetryTypeMappers.ts @@ -51,9 +51,9 @@ export function mapOperationTypeToTelemetryType(operationType?: TOperationType): /** * Map Thrift TSparkRowSetType to telemetry ExecutionResult.Format enum string. */ -export function mapResultFormatToTelemetryType(resultFormat?: TSparkRowSetType): string | undefined { +export function mapResultFormatToTelemetryType(resultFormat?: TSparkRowSetType): string { if (resultFormat === undefined) { - return undefined; + return 'FORMAT_UNSPECIFIED'; } switch (resultFormat) { From 05001b78d3352857da77eef8b2d4f84091869bad Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 27 May 2026 20:53:02 +0530 Subject: [PATCH 2/4] telemetry: restrict driver_connection_params to proto-defined fields Drop four Node-specific fields that aren't in the receiver's `DriverConnectionParameters` proto schema: - cloud_fetch_enabled - lz4_enabled - retry_max_attempts - cloud_fetch_concurrent_downloads These would be ignored at deserialization on the receiver side anyway. Remaining block contains only proto-defined fields: `host_info`, `http_path`, `enable_arrow`, `enable_direct_results`, `socket_timeout`, `enable_metric_view_metadata`. Co-authored-by: Isaac --- lib/telemetry/DatabricksTelemetryExporter.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/telemetry/DatabricksTelemetryExporter.ts b/lib/telemetry/DatabricksTelemetryExporter.ts index bd476e14..642cc0f5 100644 --- a/lib/telemetry/DatabricksTelemetryExporter.ts +++ b/lib/telemetry/DatabricksTelemetryExporter.ts @@ -68,10 +68,6 @@ interface DatabricksTelemetryLog { enable_direct_results?: boolean; socket_timeout?: number; enable_metric_view_metadata?: boolean; - cloud_fetch_enabled?: boolean; - lz4_enabled?: boolean; - retry_max_attempts?: number; - cloud_fetch_concurrent_downloads?: number; }; operation_latency_ms?: number; sql_operation?: { @@ -419,10 +415,6 @@ export default class DatabricksTelemetryExporter { enable_direct_results: metric.driverConfig.directResultsEnabled, socket_timeout: metric.driverConfig.socketTimeout, enable_metric_view_metadata: metric.driverConfig.enableMetricViewMetadata, - cloud_fetch_enabled: metric.driverConfig.cloudFetchEnabled, - lz4_enabled: metric.driverConfig.lz4Enabled, - retry_max_attempts: metric.driverConfig.retryMaxAttempts, - cloud_fetch_concurrent_downloads: metric.driverConfig.cloudFetchConcurrentDownloads, }; } } else if (metric.metricType === 'statement') { From a854d8dad93cf649ebd25b2d638f68f504334d2f Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Wed, 27 May 2026 21:03:32 +0530 Subject: [PATCH 3/4] telemetry: populate mode and use_proxy in driver_connection_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The four Node-specific fields dropped in the previous commit have no proto equivalent (CloudFetch enablement, LZ4, retry config, concurrent downloads aren't tracked at the receiver). But two unrelated proto fields can be filled from existing Node config: - `mode = "THRIFT"` — Node.js always uses Thrift transport (no SEA client). JDBC's `DatabricksClientType` enum is `SEA | THRIFT`. - `use_proxy` — derived from `ConnectionOptions.proxy`. Stored on DBSQLClient at `connect()` time alongside the existing host / httpPath / authType fields, then surfaced through a new `DriverConfiguration.useProxy` field. Co-authored-by: Isaac --- lib/DBSQLClient.ts | 4 ++++ lib/telemetry/DatabricksTelemetryExporter.ts | 4 ++++ lib/telemetry/types.ts | 3 +++ 3 files changed, 11 insertions(+) diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index b35e0d41..0e67366d 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -122,6 +122,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I private authType?: string; + private useProxy?: boolean; + private telemetryClient?: TelemetryClient; private telemetryEmitter?: TelemetryEventEmitter; @@ -425,6 +427,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I // Connection parameters httpPath: this.httpPath, enableMetricViewMetadata: this.config.enableMetricViewMetadata, + useProxy: this.useProxy, }; } @@ -604,6 +607,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.host = options.host; this.httpPath = options.path; this.authType = this.mapAuthType(options); + this.useProxy = Boolean(options.proxy); // Store enableMetricViewMetadata configuration if (options.enableMetricViewMetadata !== undefined) { diff --git a/lib/telemetry/DatabricksTelemetryExporter.ts b/lib/telemetry/DatabricksTelemetryExporter.ts index 642cc0f5..ca9f04cf 100644 --- a/lib/telemetry/DatabricksTelemetryExporter.ts +++ b/lib/telemetry/DatabricksTelemetryExporter.ts @@ -64,6 +64,8 @@ interface DatabricksTelemetryLog { driver_connection_params?: { host_info?: { host_url?: string }; http_path?: string; + mode?: string; + use_proxy?: boolean; enable_arrow?: boolean; enable_direct_results?: boolean; socket_timeout?: number; @@ -411,6 +413,8 @@ export default class DatabricksTelemetryExporter { log.entry.sql_driver_log.driver_connection_params = { host_info: { host_url: this.host }, http_path: metric.driverConfig.httpPath, + mode: 'THRIFT', + use_proxy: metric.driverConfig.useProxy, enable_arrow: metric.driverConfig.arrowEnabled, enable_direct_results: metric.driverConfig.directResultsEnabled, socket_timeout: metric.driverConfig.socketTimeout, diff --git a/lib/telemetry/types.ts b/lib/telemetry/types.ts index 9a422258..f32f0e36 100644 --- a/lib/telemetry/types.ts +++ b/lib/telemetry/types.ts @@ -304,6 +304,9 @@ export interface DriverConfiguration { /** Whether metric view metadata is enabled */ enableMetricViewMetadata?: boolean; + + /** Whether an HTTP/SOCKS proxy is configured on the connection */ + useProxy?: boolean; } /** From e04674f29ae93c9fc0844a6199ac54c6cb659b16 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Tue, 2 Jun 2026 18:31:55 +0530 Subject: [PATCH 4/4] telemetry: emit socket_timeout in seconds to match receiver proto The driver tracks socketTimeout in milliseconds, but the receiver proto field socket_timeout is defined in seconds. Convert ms -> s at export so a 15-minute timeout reports as 900s instead of 900000s. Adds regression tests covering the default value and sub-second rounding. Co-authored-by: Isaac --- lib/telemetry/DatabricksTelemetryExporter.ts | 8 +++- .../DatabricksTelemetryExporter.test.ts | 45 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/telemetry/DatabricksTelemetryExporter.ts b/lib/telemetry/DatabricksTelemetryExporter.ts index ca9f04cf..4978a969 100644 --- a/lib/telemetry/DatabricksTelemetryExporter.ts +++ b/lib/telemetry/DatabricksTelemetryExporter.ts @@ -417,7 +417,13 @@ export default class DatabricksTelemetryExporter { use_proxy: metric.driverConfig.useProxy, enable_arrow: metric.driverConfig.arrowEnabled, enable_direct_results: metric.driverConfig.directResultsEnabled, - socket_timeout: metric.driverConfig.socketTimeout, + // The proto `socket_timeout` field is defined in seconds, but the driver + // tracks socketTimeout in milliseconds — convert so the receiver records + // the correct unit (e.g. 900000ms -> 900s) instead of treating ms as seconds. + socket_timeout: + typeof metric.driverConfig.socketTimeout === 'number' + ? Math.round(metric.driverConfig.socketTimeout / 1000) + : metric.driverConfig.socketTimeout, enable_metric_view_metadata: metric.driverConfig.enableMetricViewMetadata, }; } diff --git a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts index c3fd099d..b9afa216 100644 --- a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts +++ b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts @@ -162,6 +162,51 @@ describe('DatabricksTelemetryExporter', () => { }); }); + describe('export() - driver_connection_params', () => { + // The driver tracks socketTimeout in milliseconds, but the receiver proto + // defines `socket_timeout` in seconds. Lock in the ms -> s conversion so a + // 15-minute (900000ms) timeout is reported as 900s, not 900000s. + function getConnectionParams(sendRequestStub: sinon.SinonStub): any { + const init = sendRequestStub.firstCall.args[1] as { body: string }; + const body = JSON.parse(init.body); + const log = JSON.parse(body.protoLogs[0]); + return log.entry.sql_driver_log.driver_connection_params; + } + + function makeConnectionMetric(socketTimeout: number): TelemetryMetric { + return makeMetric({ + metricType: 'connection', + driverConfig: { + driverName: 'nodejs-sql-driver', + driverVersion: '1.14.0', + socketTimeout, + } as any, + }); + } + + it('converts socketTimeout from milliseconds to seconds', async () => { + const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); + const registry = new CircuitBreakerRegistry(context); + const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + + await exporter.export([makeConnectionMetric(900000)]); + + expect(getConnectionParams(sendRequestStub).socket_timeout).to.equal(900); + }); + + it('rounds sub-second socketTimeout values', async () => { + const context = new ClientContextStub({ telemetryAuthenticatedExport: true } as any); + const registry = new CircuitBreakerRegistry(context); + const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + + await exporter.export([makeConnectionMetric(1500)]); + + expect(getConnectionParams(sendRequestStub).socket_timeout).to.equal(2); + }); + }); + describe('export() - retry logic', () => { it('should retry on retryable HTTP errors (503)', async () => { const context = new ClientContextStub({ telemetryMaxRetries: 2 } as any);