|
2 | 2 | # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3 | 3 | #
|
4 | 4 |
|
5 |
| -from dataclasses import InitVar, dataclass |
6 |
| -from typing import Any, List, Mapping, Optional, Tuple |
7 | 5 | import re
|
| 6 | +from dataclasses import InitVar, dataclass |
8 | 7 | from datetime import datetime as dt
|
| 8 | +from typing import Any, List, Mapping, Optional, Tuple |
9 | 9 |
|
10 | 10 | from airbyte_cdk.sources.declarative.schema import JsonFileSchemaLoader
|
11 | 11 | from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
14 | 14 |
|
15 | 15 | class ParserError(Exception):
|
16 | 16 | """Replacement for pendulum's ParserError"""
|
| 17 | + |
17 | 18 | pass
|
18 | 19 |
|
19 | 20 |
|
@@ -48,75 +49,71 @@ def parse(self, text):
|
48 | 49 | Handles various date formats including those with timezone information.
|
49 | 50 | """
|
50 | 51 | # Reject dates with zeros like '0000-00-00' or '0000-00-00 00:00:00'
|
51 |
| - if re.match(r'^0+[-]0+[-]0+', text): |
| 52 | + if re.match(r"^0+[-]0+[-]0+", text): |
52 | 53 | raise ParserError("Zero date not allowed")
|
53 |
| - |
| 54 | + |
54 | 55 | # Comprehensive list of formats to try
|
55 | 56 | formats = [
|
56 | 57 | # Basic formats
|
57 |
| - '%Y-%m-%d', |
58 |
| - '%Y/%m/%d', |
59 |
| - '%d-%m-%Y', |
60 |
| - '%d/%m/%Y', |
61 |
| - |
| 58 | + "%Y-%m-%d", |
| 59 | + "%Y/%m/%d", |
| 60 | + "%d-%m-%Y", |
| 61 | + "%d/%m/%Y", |
62 | 62 | # Date and time formats
|
63 |
| - '%Y-%m-%d %H:%M:%S', |
64 |
| - '%Y-%m-%d %H:%M:%S.%f', |
65 |
| - '%Y/%m/%d %H:%M:%S', |
66 |
| - '%Y/%m/%d %H:%M:%S.%f', |
67 |
| - |
| 63 | + "%Y-%m-%d %H:%M:%S", |
| 64 | + "%Y-%m-%d %H:%M:%S.%f", |
| 65 | + "%Y/%m/%d %H:%M:%S", |
| 66 | + "%Y/%m/%d %H:%M:%S.%f", |
68 | 67 | # ISO formats
|
69 |
| - '%Y-%m-%dT%H:%M:%S', |
70 |
| - '%Y-%m-%dT%H:%M:%S.%f', |
71 |
| - |
| 68 | + "%Y-%m-%dT%H:%M:%S", |
| 69 | + "%Y-%m-%dT%H:%M:%S.%f", |
72 | 70 | # With timezone
|
73 |
| - '%Y-%m-%d %H:%M:%S%z', |
74 |
| - '%Y-%m-%d %H:%M:%S.%f%z', |
75 |
| - '%Y-%m-%dT%H:%M:%S%z', |
76 |
| - '%Y-%m-%dT%H:%M:%S.%f%z', |
77 |
| - |
| 71 | + "%Y-%m-%d %H:%M:%S%z", |
| 72 | + "%Y-%m-%d %H:%M:%S.%f%z", |
| 73 | + "%Y-%m-%dT%H:%M:%S%z", |
| 74 | + "%Y-%m-%dT%H:%M:%S.%f%z", |
78 | 75 | # Using Z for UTC
|
79 |
| - '%Y-%m-%dT%H:%M:%SZ', |
80 |
| - '%Y-%m-%dT%H:%M:%S.%fZ', |
| 76 | + "%Y-%m-%dT%H:%M:%SZ", |
| 77 | + "%Y-%m-%dT%H:%M:%S.%fZ", |
81 | 78 | ]
|
82 | 79 |
|
83 | 80 | # Try parsing with different formats
|
84 | 81 | for fmt in formats:
|
85 | 82 | try:
|
86 | 83 | # Handle 'Z' timezone indicator for UTC
|
87 | 84 | text_to_parse = text
|
88 |
| - if fmt.endswith('Z') and not text.endswith('Z'): |
| 85 | + if fmt.endswith("Z") and not text.endswith("Z"): |
89 | 86 | continue
|
90 |
| - if not fmt.endswith('Z') and text.endswith('Z'): |
| 87 | + if not fmt.endswith("Z") and text.endswith("Z"): |
91 | 88 | text_to_parse = text[:-1] # Remove Z
|
92 |
| - fmt = fmt + 'Z' if 'Z' not in fmt else fmt |
93 |
| - |
| 89 | + fmt = fmt + "Z" if "Z" not in fmt else fmt |
| 90 | + |
94 | 91 | date_obj = dt.strptime(text_to_parse, fmt)
|
95 | 92 | # In pendulum, dates with zero components are rejected
|
96 | 93 | if date_obj.year == 0 or date_obj.month == 0 or date_obj.day == 0:
|
97 | 94 | raise ParserError("Date with zero components")
|
98 | 95 | return date_obj
|
99 | 96 | except ValueError:
|
100 | 97 | continue
|
101 |
| - |
| 98 | + |
102 | 99 | # Try ISO format as a last resort
|
103 | 100 | try:
|
104 | 101 | # Replace Z with +00:00 for ISO format parsing
|
105 |
| - iso_text = text.replace('Z', '+00:00') |
106 |
| - |
| 102 | + iso_text = text.replace("Z", "+00:00") |
| 103 | + |
107 | 104 | # For Python < 3.11 compatibility, remove microseconds if they have more than 6 digits
|
108 |
| - microseconds_match = re.search(r'\.(\d{7,})(?=[+-Z]|$)', iso_text) |
| 105 | + microseconds_match = re.search(r"\.(\d{7,})(?=[+-Z]|$)", iso_text) |
109 | 106 | if microseconds_match:
|
110 | 107 | fixed_micro = microseconds_match.group(1)[:6]
|
111 |
| - iso_text = iso_text.replace(microseconds_match.group(0), f'.{fixed_micro}') |
112 |
| - |
| 108 | + iso_text = iso_text.replace(microseconds_match.group(0), f".{fixed_micro}") |
| 109 | + |
113 | 110 | date_obj = dt.fromisoformat(iso_text)
|
114 | 111 | if date_obj.year == 0 or date_obj.month == 0 or date_obj.day == 0:
|
115 | 112 | raise ParserError("Date with zero components")
|
116 | 113 | return date_obj
|
117 | 114 | except (ValueError, AttributeError):
|
118 | 115 | pass
|
119 |
| - |
| 116 | + |
120 | 117 | # If nothing worked, raise the error like pendulum would
|
121 | 118 | raise ParserError(f"Unable to parse: {text}")
|
122 | 119 |
|
|
0 commit comments