|
31 | 31 | },
|
32 | 32 | "globs": {
|
33 | 33 | "title": "Globs",
|
| 34 | + "description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href=\"https://en.wikipedia.org/wiki/Glob_(programming)\">here</a>.", |
34 | 35 | "default": ["**"],
|
35 | 36 | "order": 1,
|
36 |
| - "description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href=\"https://en.wikipedia.org/wiki/Glob_(programming)\">here</a>.", |
37 | 37 | "type": "array",
|
38 | 38 | "items": {
|
39 | 39 | "type": "string"
|
|
59 | 59 | "primary_key": {
|
60 | 60 | "title": "Primary Key",
|
61 | 61 | "description": "The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.",
|
62 |
| - "type": "string", |
63 |
| - "airbyte_hidden": true |
| 62 | + "airbyte_hidden": true, |
| 63 | + "type": "string" |
64 | 64 | },
|
65 | 65 | "days_to_sync_if_history_is_full": {
|
66 | 66 | "title": "Days To Sync If History Is Full",
|
|
295 | 295 | "type": "string"
|
296 | 296 | },
|
297 | 297 | "skip_unprocessable_files": {
|
298 |
| - "type": "boolean", |
299 |
| - "default": true, |
300 | 298 | "title": "Skip Unprocessable Files",
|
301 | 299 | "description": "If true, skip files that cannot be parsed and pass the error message along as the _ab_source_file_parse_error field. If false, fail the sync.",
|
302 |
| - "always_show": true |
| 300 | + "default": true, |
| 301 | + "always_show": true, |
| 302 | + "type": "boolean" |
303 | 303 | },
|
304 | 304 | "strategy": {
|
305 |
| - "type": "string", |
| 305 | + "title": "Parsing Strategy", |
| 306 | + "description": "The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf", |
| 307 | + "default": "auto", |
306 | 308 | "always_show": true,
|
307 | 309 | "order": 0,
|
308 |
| - "default": "auto", |
309 |
| - "title": "Parsing Strategy", |
310 | 310 | "enum": ["auto", "fast", "ocr_only", "hi_res"],
|
311 |
| - "description": "The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf" |
| 311 | + "type": "string" |
312 | 312 | },
|
313 | 313 | "processing": {
|
314 | 314 | "title": "Processing",
|
|
346 | 346 | "description": "When enabled, syncs will not validate or structure records against the stream's schema.",
|
347 | 347 | "default": false,
|
348 | 348 | "type": "boolean"
|
| 349 | + }, |
| 350 | + "recent_n_files_to_read_for_schema_discovery": { |
| 351 | + "title": "Files To Read For Schema Discover", |
| 352 | + "description": "The number of resent files which will be used to discover the schema for this stream.", |
| 353 | + "exclusiveMinimum": 0, |
| 354 | + "type": "integer" |
349 | 355 | }
|
350 | 356 | },
|
351 | 357 | "required": ["name", "format"]
|
|
364 | 370 | "order": 2,
|
365 | 371 | "type": "string"
|
366 | 372 | },
|
| 373 | + "role_arn": { |
| 374 | + "title": "AWS Role ARN", |
| 375 | + "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", |
| 376 | + "order": 6, |
| 377 | + "type": "string" |
| 378 | + }, |
367 | 379 | "aws_secret_access_key": {
|
368 | 380 | "title": "AWS Secret Access Key",
|
369 | 381 | "description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
|
370 | 382 | "airbyte_secret": true,
|
371 | 383 | "order": 3,
|
372 | 384 | "type": "string"
|
373 | 385 | },
|
374 |
| - "role_arn": { |
375 |
| - "title": "AWS Role ARN", |
376 |
| - "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", |
377 |
| - "order": 6, |
378 |
| - "type": "string" |
379 |
| - }, |
380 | 386 | "endpoint": {
|
381 | 387 | "title": "Endpoint",
|
382 | 388 | "description": "Endpoint to an S3 compatible service. Leave empty to use AWS.",
|
|
0 commit comments