4
4
5
5
from dataclasses import InitVar , dataclass
6
6
from typing import Any , List , Mapping , Optional , Tuple
7
-
8
- import pendulum
9
- from pendulum .parsing .exceptions import ParserError
7
+ import re
8
+ from datetime import datetime as dt
10
9
11
10
from airbyte_cdk .sources .declarative .schema import JsonFileSchemaLoader
12
11
from airbyte_cdk .sources .declarative .transformations import RecordTransformation
13
12
from airbyte_cdk .sources .declarative .types import Config , Record , StreamSlice , StreamState
14
13
15
14
15
+ class ParserError (Exception ):
16
+ """Replacement for pendulum's ParserError"""
17
+ pass
18
+
19
+
16
20
@dataclass
17
21
class CustomFieldTransformation (RecordTransformation ):
18
22
"""
@@ -21,11 +25,17 @@ class CustomFieldTransformation(RecordTransformation):
21
25
22
26
config : Config
23
27
parameters : InitVar [Mapping [str , Any ]]
28
+ cursor_datetime_formats : List [str ] = None
29
+ datetime_format : str = None
24
30
25
31
def __post_init__ (self , parameters : Mapping [str , Any ]):
26
32
self .name = parameters .get ("name" )
27
33
self ._schema = self ._get_schema_root_properties ()
28
34
self ._date_and_date_time_fields = self ._get_fields_with_property_formats_from_schema (("date" , "date-time" ))
35
+
36
+ # Get datetime formats from parameters
37
+ self .cursor_datetime_formats = parameters .get ("cursor_datetime_formats" , ["%Y-%m-%d %H:%M:%S" ])
38
+ self .datetime_format = parameters .get ("datetime_format" , "%Y-%m-%d %H:%M:%S" )
29
39
30
40
def _get_schema_root_properties (self ):
31
41
schema_loader = JsonFileSchemaLoader (config = self .config , parameters = {"name" : self .name })
@@ -38,6 +48,89 @@ def _get_fields_with_property_formats_from_schema(self, property_formats: Tuple[
38
48
"""
39
49
return [k for k , v in self ._schema .items () if v .get ("format" ) in property_formats ]
40
50
51
+ def parse (self , text ):
52
+ """
53
+ Direct replacement for pendulum.parse functionality.
54
+ Handles various date formats including those with timezone information.
55
+ """
56
+ # Reject dates with zeros like '0000-00-00' or '0000-00-00 00:00:00'
57
+ if re .match (r'^0+[-]0+[-]0+' , text ):
58
+ raise ParserError ("Zero date not allowed" )
59
+
60
+ # Comprehensive list of formats to try
61
+ formats = [
62
+ # Basic formats
63
+ '%Y-%m-%d' ,
64
+ '%Y/%m/%d' ,
65
+ '%d-%m-%Y' ,
66
+ '%d/%m/%Y' ,
67
+
68
+ # Date and time formats
69
+ '%Y-%m-%d %H:%M:%S' ,
70
+ '%Y-%m-%d %H:%M:%S.%f' ,
71
+ '%Y/%m/%d %H:%M:%S' ,
72
+ '%Y/%m/%d %H:%M:%S.%f' ,
73
+
74
+ # ISO formats
75
+ '%Y-%m-%dT%H:%M:%S' ,
76
+ '%Y-%m-%dT%H:%M:%S.%f' ,
77
+
78
+ # With timezone
79
+ '%Y-%m-%d %H:%M:%S%z' ,
80
+ '%Y-%m-%d %H:%M:%S.%f%z' ,
81
+ '%Y-%m-%dT%H:%M:%S%z' ,
82
+ '%Y-%m-%dT%H:%M:%S.%f%z' ,
83
+
84
+ # Using Z for UTC
85
+ '%Y-%m-%dT%H:%M:%SZ' ,
86
+ '%Y-%m-%dT%H:%M:%S.%fZ' ,
87
+ ]
88
+
89
+ # Add specified formats from parameters
90
+ formats .extend (self .cursor_datetime_formats )
91
+ if self .datetime_format and self .datetime_format not in formats :
92
+ formats .append (self .datetime_format )
93
+
94
+ # Try parsing with different formats
95
+ for fmt in formats :
96
+ try :
97
+ # Handle 'Z' timezone indicator for UTC
98
+ text_to_parse = text
99
+ if fmt .endswith ('Z' ) and not text .endswith ('Z' ):
100
+ continue
101
+ if not fmt .endswith ('Z' ) and text .endswith ('Z' ):
102
+ text_to_parse = text [:- 1 ] # Remove Z
103
+ fmt = fmt + 'Z' if 'Z' not in fmt else fmt
104
+
105
+ date_obj = dt .strptime (text_to_parse , fmt )
106
+ # In pendulum, dates with zero components are rejected
107
+ if date_obj .year == 0 or date_obj .month == 0 or date_obj .day == 0 :
108
+ raise ParserError ("Date with zero components" )
109
+ return date_obj
110
+ except ValueError :
111
+ continue
112
+
113
+ # Try ISO format as a last resort
114
+ try :
115
+ # Replace Z with +00:00 for ISO format parsing
116
+ iso_text = text .replace ('Z' , '+00:00' )
117
+
118
+ # For Python < 3.11 compatibility, remove microseconds if they have more than 6 digits
119
+ microseconds_match = re .search (r'\.(\d{7,})(?=[+-Z]|$)' , iso_text )
120
+ if microseconds_match :
121
+ fixed_micro = microseconds_match .group (1 )[:6 ]
122
+ iso_text = iso_text .replace (microseconds_match .group (0 ), f'.{ fixed_micro } ' )
123
+
124
+ date_obj = dt .fromisoformat (iso_text )
125
+ if date_obj .year == 0 or date_obj .month == 0 or date_obj .day == 0 :
126
+ raise ParserError ("Date with zero components" )
127
+ return date_obj
128
+ except (ValueError , AttributeError ):
129
+ pass
130
+
131
+ # If nothing worked, raise the error like pendulum would
132
+ raise ParserError (f"Unable to parse: { text } " )
133
+
41
134
def transform (
42
135
self ,
43
136
record : Record ,
@@ -48,7 +141,7 @@ def transform(
48
141
for item in record :
49
142
if item in self ._date_and_date_time_fields and record .get (item ):
50
143
try :
51
- pendulum .parse (record [item ])
144
+ self .parse (record [item ])
52
145
except ParserError :
53
146
record [item ] = None
54
147
return record
0 commit comments