Skip to content

Commit 9c176c0

Browse files
authored
Enhancements for Gremlin HTTP (#624)
* Gremlin HTTP enhancments * remove unused import * update changelog
1 parent 35a3a51 commit 9c176c0

File tree

5 files changed

+58
-21
lines changed

5 files changed

+58
-21
lines changed

ChangeLog.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
88

99
- Added `--connection-protocol` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/617))
1010
- Added global Gremlin `connection_protocol` setting to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/621))
11+
- Added various enhancements for `%%gremlin` HTTP connections to Neptune ([Link to PR](https://github.com/aws/graph-notebook/pull/624))
1112
- Restored left alignment of numeric value columns in results table widget ([Link to PR](https://github.com/aws/graph-notebook/pull/620))
1213

1314
## Release 4.4.0 (June 10, 2024)

src/graph_notebook/configuration/get_config.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
SparqlSection, GremlinSection, Neo4JSection
1010
from graph_notebook.neptune.client import NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \
1111
DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, \
12-
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, NEPTUNE_DB_CONFIG_NAMES, NEPTUNE_ANALYTICS_CONFIG_NAMES
12+
NEPTUNE_DB_SERVICE_NAME, DEFAULT_WS_PROTOCOL, DEFAULT_HTTP_PROTOCOL
1313

1414
neptune_params = ['neptune_service', 'auth_mode', 'load_from_s3_arn', 'aws_region']
1515
neptune_gremlin_params = ['connection_protocol']
@@ -33,11 +33,15 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I
3333
neptune_service = data['neptune_service'] if 'neptune_service' in data else NEPTUNE_DB_SERVICE_NAME
3434
if 'gremlin' in data:
3535
data['gremlin']['include_protocol'] = True
36+
if 'connection_protocol' not in data['gremlin']:
37+
data['gremlin']['connection_protocol'] = DEFAULT_WS_PROTOCOL \
38+
if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
3639
gremlin_section = GremlinSection(**data['gremlin'])
3740
if gremlin_section.to_dict()['traversal_source'] != 'g':
3841
print('Ignoring custom traversal source, Amazon Neptune does not support this functionality.\n')
3942
else:
40-
gremlin_section = GremlinSection(include_protocol=True)
43+
protocol = DEFAULT_WS_PROTOCOL if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
44+
gremlin_section = GremlinSection(include_protocol=True, connection_protocol=protocol)
4145
if neo4j_section.to_dict()['username'] != DEFAULT_NEO4J_USERNAME \
4246
or neo4j_section.to_dict()['password'] != DEFAULT_NEO4J_PASSWORD:
4347
print('Ignoring Neo4J custom authentication, Amazon Neptune does not support this functionality.\n')

src/graph_notebook/magics/graph_magic.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,16 @@
4444
neptune_db_only, neptune_graph_only
4545
from graph_notebook.magics.ml import neptune_ml_magic_handler, generate_neptune_ml_parser
4646
from graph_notebook.magics.streams import StreamViewer
47-
from graph_notebook.neptune.client import ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
47+
from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
4848
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
4949
DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, \
5050
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
5151
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
5252
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
53-
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, \
54-
OPENCYPHER_STATUS_STATE_MODES, normalize_service_name, GRAPH_PG_INFO_METRICS, \
55-
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name
53+
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
54+
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
55+
normalize_service_name, GRAPH_PG_INFO_METRICS, \
56+
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name)
5657
from graph_notebook.network import SPARQLNetwork
5758
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
5859
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
@@ -534,7 +535,7 @@ def stream_viewer(self, line):
534535

535536
language = args.language
536537
limit = args.limit
537-
uri = self.client.get_uri_with_port()
538+
uri = self.client.get_uri(include_port=True)
538539
viewer = StreamViewer(self.client, uri, language, limit=limit)
539540
viewer.show()
540541

@@ -1034,8 +1035,9 @@ def gremlin(self, line, cell, local_ns: dict = None):
10341035
f'If not specified, defaults to the value of the gremlin.connection_protocol field '
10351036
f'in %graph_notebook_config. Please note that this option has no effect on the '
10361037
f'Profile and Explain modes, which must use HTTP.')
1037-
parser.add_argument('--explain-type', type=str.lower, default='',
1038-
help='Explain mode to use when using the explain query mode.')
1038+
parser.add_argument('--explain-type', type=str.lower, default='dynamic',
1039+
help=f'Explain mode to use when using the explain query mode. '
1040+
f'Accepted values: {GREMLIN_EXPLAIN_MODES}')
10391041
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
10401042
parser.add_argument('-g', '--group-by', type=str, default='',
10411043
help='Property used to group nodes (e.g. code, T.region) default is T.label')
@@ -1074,6 +1076,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
10741076
'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED')
10751077
parser.add_argument('--profile-indexOps', action='store_true', default=False,
10761078
help='Show a detailed report of all index operations.')
1079+
parser.add_argument('--profile-debug', action='store_true', default=False,
1080+
help='Enable debug mode.')
10771081
parser.add_argument('--profile-misc-args', type=str, default='{}',
10781082
help='Additional profile options, passed in as a map.')
10791083
parser.add_argument('-sp', '--stop-physics', action='store_true', default=False,
@@ -1154,7 +1158,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
11541158
profile_args = {"profile.results": args.profile_no_results,
11551159
"profile.chop": args.profile_chop,
11561160
"profile.serializer": serializer,
1157-
"profile.indexOps": args.profile_indexOps}
1161+
"profile.indexOps": args.profile_indexOps,
1162+
"profile.debug": args.profile_debug}
11581163
try:
11591164
profile_misc_args_dict = json.loads(args.profile_misc_args)
11601165
profile_args.update(profile_misc_args_dict)

src/graph_notebook/magics/metadata.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,11 @@ def build_gremlin_metadata_from_query(query_type: str, results: any, res: Respon
204204
if query_type == 'explain':
205205
gremlin_metadata = create_propertygraph_metadata_obj('explain')
206206
gremlin_metadata.set_request_metrics(res)
207-
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
208-
.replace(".", '').replace(",", '')))
207+
try:
208+
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
209+
.replace(".", '').replace(",", '')))
210+
except AttributeError:
211+
pass
209212
return gremlin_metadata
210213
elif query_type == 'profile':
211214
gremlin_metadata = create_propertygraph_metadata_obj('profile')

src/graph_notebook/neptune/client.py

+33-9
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137

138138
SPARQL_EXPLAIN_MODES = ['dynamic', 'static', 'details']
139139
OPENCYPHER_EXPLAIN_MODES = ['dynamic', 'static', 'details']
140+
GREMLIN_EXPLAIN_MODES = ['dynamic', 'static', 'details']
140141
OPENCYPHER_PLAN_CACHE_MODES = ['auto', 'enabled', 'disabled']
141142
OPENCYPHER_DEFAULT_TIMEOUT = 120000
142143
OPENCYPHER_STATUS_STATE_MODES = ['ALL', 'RUNNING', 'WAITING', 'CANCELLING']
@@ -257,7 +258,7 @@ def is_neptune_domain(self):
257258
def is_analytics_domain(self):
258259
return self.service == NEPTUNE_ANALYTICS_SERVICE_NAME
259260

260-
def get_uri_with_port(self, use_websocket=False, use_proxy=False):
261+
def get_uri(self, use_websocket=False, use_proxy=False, include_port=True):
261262
if use_websocket is True:
262263
protocol = self._ws_protocol
263264
else:
@@ -270,7 +271,9 @@ def get_uri_with_port(self, use_websocket=False, use_proxy=False):
270271
uri_host = self.target_host
271272
uri_port = self.target_port
272273

273-
uri = f'{protocol}://{uri_host}:{uri_port}'
274+
uri = f'{protocol}://{uri_host}'
275+
if include_port:
276+
uri += f':{uri_port}'
274277
return uri
275278

276279
def get_graph_id(self):
@@ -347,9 +350,9 @@ def sparql_cancel(self, query_id: str, silent: bool = False):
347350
def get_gremlin_connection(self, transport_kwargs) -> client.Client:
348351
nest_asyncio.apply()
349352

350-
ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin'
353+
ws_url = f'{self.get_uri(use_websocket=True, use_proxy=False)}/gremlin'
351354
if self.proxy_host != '':
352-
proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin'
355+
proxy_http_url = f'{self.get_uri(use_websocket=False, use_proxy=True)}/gremlin'
353356
transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url,
354357
**transport_kwargs)
355358
request = self._prepare_request('GET', proxy_http_url)
@@ -387,9 +390,17 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
387390
if headers is None:
388391
headers = {}
389392

393+
data = {}
390394
use_proxy = True if self.proxy_host != '' else False
391-
uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin'
392-
data = {'gremlin': query}
395+
if self.is_analytics_domain():
396+
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy, include_port=False)}/queries'
397+
data['language'] = 'gremlin'
398+
data['gremlin'] = query
399+
headers['content-type'] = 'application/json'
400+
else:
401+
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy)}/gremlin'
402+
data['gremlin'] = query
403+
393404
req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers)
394405
res = self._http_session.send(req, verify=self.ssl_verify)
395406
return res
@@ -412,12 +423,25 @@ def gremlin_profile(self, query: str, args={}) -> requests.Response:
412423
return self._gremlin_query_plan(query=query, plan_type='profile', args=args)
413424

414425
def _gremlin_query_plan(self, query: str, plan_type: str, args: dict, ) -> requests.Response:
415-
url = f'{self._http_protocol}://{self.host}:{self.port}/gremlin/{plan_type}'
416-
data = {'gremlin': query}
426+
data = {}
427+
headers = {}
428+
url = f'{self._http_protocol}://{self.host}'
429+
if self.is_analytics_domain():
430+
url += '/queries'
431+
data['gremlin'] = query
432+
data['language'] = 'gremlin'
433+
headers['content-type'] = 'application/json'
434+
if plan_type == 'explain':
435+
data['explain.mode'] = args.pop('explain.mode')
436+
elif plan_type == 'profile':
437+
data['profile.debug'] = args.pop('profile.debug')
438+
else:
439+
url += f':{self.port}/gremlin/{plan_type}'
440+
data['gremlin'] = query
417441
if args:
418442
for param, value in args.items():
419443
data[param] = value
420-
req = self._prepare_request('POST', url, data=json.dumps(data))
444+
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
421445
res = self._http_session.send(req, verify=self.ssl_verify)
422446
return res
423447

0 commit comments

Comments
 (0)