Skip to content

Commit 9f434e7

Browse files
committed
update components.py without using pendulum
1 parent c78f588 commit 9f434e7

File tree

1 file changed

+97
-4
lines changed

1 file changed

+97
-4
lines changed

airbyte-integrations/connectors/source-prestashop/components.py

+97-4
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,19 @@
44

55
from dataclasses import InitVar, dataclass
66
from typing import Any, List, Mapping, Optional, Tuple
7-
8-
import pendulum
9-
from pendulum.parsing.exceptions import ParserError
7+
import re
8+
from datetime import datetime as dt
109

1110
from airbyte_cdk.sources.declarative.schema import JsonFileSchemaLoader
1211
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
1312
from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState
1413

1514

15+
class ParserError(Exception):
16+
"""Replacement for pendulum's ParserError"""
17+
pass
18+
19+
1620
@dataclass
1721
class CustomFieldTransformation(RecordTransformation):
1822
"""
@@ -21,11 +25,17 @@ class CustomFieldTransformation(RecordTransformation):
2125

2226
config: Config
2327
parameters: InitVar[Mapping[str, Any]]
28+
cursor_datetime_formats: List[str] = None
29+
datetime_format: str = None
2430

2531
def __post_init__(self, parameters: Mapping[str, Any]):
2632
self.name = parameters.get("name")
2733
self._schema = self._get_schema_root_properties()
2834
self._date_and_date_time_fields = self._get_fields_with_property_formats_from_schema(("date", "date-time"))
35+
36+
# Get datetime formats from parameters
37+
self.cursor_datetime_formats = parameters.get("cursor_datetime_formats", ["%Y-%m-%d %H:%M:%S"])
38+
self.datetime_format = parameters.get("datetime_format", "%Y-%m-%d %H:%M:%S")
2939

3040
def _get_schema_root_properties(self):
3141
schema_loader = JsonFileSchemaLoader(config=self.config, parameters={"name": self.name})
@@ -38,6 +48,89 @@ def _get_fields_with_property_formats_from_schema(self, property_formats: Tuple[
3848
"""
3949
return [k for k, v in self._schema.items() if v.get("format") in property_formats]
4050

51+
def parse(self, text):
52+
"""
53+
Direct replacement for pendulum.parse functionality.
54+
Handles various date formats including those with timezone information.
55+
"""
56+
# Reject dates with zeros like '0000-00-00' or '0000-00-00 00:00:00'
57+
if re.match(r'^0+[-]0+[-]0+', text):
58+
raise ParserError("Zero date not allowed")
59+
60+
# Comprehensive list of formats to try
61+
formats = [
62+
# Basic formats
63+
'%Y-%m-%d',
64+
'%Y/%m/%d',
65+
'%d-%m-%Y',
66+
'%d/%m/%Y',
67+
68+
# Date and time formats
69+
'%Y-%m-%d %H:%M:%S',
70+
'%Y-%m-%d %H:%M:%S.%f',
71+
'%Y/%m/%d %H:%M:%S',
72+
'%Y/%m/%d %H:%M:%S.%f',
73+
74+
# ISO formats
75+
'%Y-%m-%dT%H:%M:%S',
76+
'%Y-%m-%dT%H:%M:%S.%f',
77+
78+
# With timezone
79+
'%Y-%m-%d %H:%M:%S%z',
80+
'%Y-%m-%d %H:%M:%S.%f%z',
81+
'%Y-%m-%dT%H:%M:%S%z',
82+
'%Y-%m-%dT%H:%M:%S.%f%z',
83+
84+
# Using Z for UTC
85+
'%Y-%m-%dT%H:%M:%SZ',
86+
'%Y-%m-%dT%H:%M:%S.%fZ',
87+
]
88+
89+
# Add specified formats from parameters
90+
formats.extend(self.cursor_datetime_formats)
91+
if self.datetime_format and self.datetime_format not in formats:
92+
formats.append(self.datetime_format)
93+
94+
# Try parsing with different formats
95+
for fmt in formats:
96+
try:
97+
# Handle 'Z' timezone indicator for UTC
98+
text_to_parse = text
99+
if fmt.endswith('Z') and not text.endswith('Z'):
100+
continue
101+
if not fmt.endswith('Z') and text.endswith('Z'):
102+
text_to_parse = text[:-1] # Remove Z
103+
fmt = fmt + 'Z' if 'Z' not in fmt else fmt
104+
105+
date_obj = dt.strptime(text_to_parse, fmt)
106+
# In pendulum, dates with zero components are rejected
107+
if date_obj.year == 0 or date_obj.month == 0 or date_obj.day == 0:
108+
raise ParserError("Date with zero components")
109+
return date_obj
110+
except ValueError:
111+
continue
112+
113+
# Try ISO format as a last resort
114+
try:
115+
# Replace Z with +00:00 for ISO format parsing
116+
iso_text = text.replace('Z', '+00:00')
117+
118+
# For Python < 3.11 compatibility, remove microseconds if they have more than 6 digits
119+
microseconds_match = re.search(r'\.(\d{7,})(?=[+-Z]|$)', iso_text)
120+
if microseconds_match:
121+
fixed_micro = microseconds_match.group(1)[:6]
122+
iso_text = iso_text.replace(microseconds_match.group(0), f'.{fixed_micro}')
123+
124+
date_obj = dt.fromisoformat(iso_text)
125+
if date_obj.year == 0 or date_obj.month == 0 or date_obj.day == 0:
126+
raise ParserError("Date with zero components")
127+
return date_obj
128+
except (ValueError, AttributeError):
129+
pass
130+
131+
# If nothing worked, raise the error like pendulum would
132+
raise ParserError(f"Unable to parse: {text}")
133+
41134
def transform(
42135
self,
43136
record: Record,
@@ -48,7 +141,7 @@ def transform(
48141
for item in record:
49142
if item in self._date_and_date_time_fields and record.get(item):
50143
try:
51-
pendulum.parse(record[item])
144+
self.parse(record[item])
52145
except ParserError:
53146
record[item] = None
54147
return record

0 commit comments

Comments
 (0)