apache · zanmato1984 · Feb 12, 2025 · pitrou · Mar 17, 2025 · pitrou
@@ -195,6 +195,7 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
 
   Status StartProducing() override {
     NoteStartProducing(ToStringExtra(0));
+    local_states_.resize(plan_->query_context()->max_concurrency());
     return Status::OK();
   }
 
@@ -212,6 +213,17 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
   std::string ToStringExtra(int indent) const override;
 
  private:
+  struct ThreadLocalState {
+    // Holds the segment key values of the most recent input batch processed by a thread.
+    // The values are updated every time an input batch is processed by the thread
+    std::vector<Datum> segmenter_values;
+  };
+
+  ThreadLocalState* GetLocalState() {
+    size_t thread_index = plan_->query_context()->GetThreadIndex();
+    return &local_states_[thread_index];
+  }
+
   Status ResetKernelStates();
 
   Status OutputResult(bool is_last);
@@ -220,9 +232,6 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
   std::unique_ptr<RowSegmenter> segmenter_;
   // Field indices corresponding to the segment-keys
   const std::vector<int> segment_field_ids_;
-  // Holds the value of segment keys of the most recent input batch
-  // The values are updated every time an input batch is processed
-  std::vector<Datum> segmenter_values_;
 
   const std::vector<std::vector<int>> target_fieldsets_;
   const std::vector<Aggregate> aggs_;
@@ -233,6 +242,9 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
 
   AtomicCounter input_counter_;
+
+  std::vector<ThreadLocalState> local_states_;
+
   /// \brief Total number of output batches produced
   int total_output_batches_ = 0;
 };
@@ -309,6 +321,9 @@ class GroupByNode : public ExecNode, public TracedNode {
   struct ThreadLocalState {
     std::unique_ptr<Grouper> grouper;
     std::vector<std::unique_ptr<KernelState>> agg_states;
+    // Holds values of the current batch in this thread that were selected for the
+    // segment-keys
+    std::vector<Datum> segmenter_values;
   };
 
   ThreadLocalState* GetLocalState() {
@@ -330,8 +345,6 @@ class GroupByNode : public ExecNode, public TracedNode {
   int output_task_group_id_;
   /// \brief A segmenter for the segment-keys
   std::unique_ptr<RowSegmenter> segmenter_;
-  /// \brief Holds values of the current batch that were selected for the segment-keys
-  std::vector<Datum> segmenter_values_;
 
   const std::vector<int> key_field_ids_;
   /// \brief Field indices corresponding to the segment-keys

@@ -213,6 +213,26 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(GroupByNode, BasicParallel) {
+  const int64_t num_batches = 8;
+
+  std::vector<ExecBatch> batches(num_batches, ExecBatchFromJSON({int32()}, "[[42]]"));
+
+  Declaration plan = Declaration::Sequence(
+      {{"exec_batch_source",
+        ExecBatchSourceNodeOptions(schema({field("key", int32())}), batches)},
+       {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"hash_count_all", "count(*)"}},
+                                          /*keys=*/{"key"}}}});
+
+  ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                       DeclarationToExecBatches(plan));
+
+  ExecBatch expected_batch = ExecBatchFromJSON(
+      {int32(), int64()}, "[[42, " + std::to_string(num_batches) + "]]");
+  AssertExecBatchesEqualIgnoringOrder(out_batches.schema, {expected_batch},
+                                      out_batches.batches);
+}
+
 TEST(ScalarAggregateNode, AnyAll) {
   // GH-43768: boolean_any and boolean_all with constant input should work well
   // when min_count != 0.
@@ -265,5 +285,24 @@ TEST(ScalarAggregateNode, AnyAll) {
   }
 }
 
+TEST(ScalarAggregateNode, BasicParallel) {
+  const int64_t num_batches = 8;
+
+  std::vector<ExecBatch> batches(num_batches, ExecBatchFromJSON({int32()}, "[[42]]"));
+
+  Declaration plan = Declaration::Sequence(
+      {{"exec_batch_source",
+        ExecBatchSourceNodeOptions(schema({field("", int32())}), batches)},
+       {"aggregate", AggregateNodeOptions{/*aggregates=*/{{"count_all", "count(*)"}}}}});
+
+  ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                       DeclarationToExecBatches(plan));
+
+  ExecBatch expected_batch =
+      ExecBatchFromJSON({int64()}, "[[" + std::to_string(num_batches) + "]]");
+  AssertExecBatchesEqualIgnoringOrder(out_batches.schema, {expected_batch},
+                                      out_batches.batches);
+}
+
 }  // namespace acero
 }  // namespace arrow
@@ -312,7 +312,7 @@ Result<ExecBatch> GroupByNode::Finalize() {
                          segment_key_field_ids_.size());
 
   // Segment keys come first
-  PlaceFields(out_data, 0, segmenter_values_);
+  PlaceFields(out_data, 0, state->segmenter_values);
   // Followed by keys
   ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, state->grouper->GetUniques());
   std::move(out_keys.values.begin(), out_keys.values.end(),
@@ -379,8 +379,8 @@ Status GroupByNode::InputReceived(ExecNode* input, ExecBatch batch) {
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
     auto batch = ExecSpan(exec_batch);
     RETURN_NOT_OK(Consume(batch));
-    RETURN_NOT_OK(
-        ExtractSegmenterValues(&segmenter_values_, exec_batch, segment_key_field_ids_));
+    RETURN_NOT_OK(ExtractSegmenterValues(&GetLocalState()->segmenter_values, exec_batch,
+                                         segment_key_field_ids_));
     if (!segment.is_open) RETURN_NOT_OK(OutputResult(/*is_last=*/false));
     return Status::OK();
   };

@@ -240,8 +240,8 @@ Status ScalarAggregateNode::InputReceived(ExecNode* input, ExecBatch batch) {
     // We add segment to the current segment group aggregation
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
     RETURN_NOT_OK(DoConsume(ExecSpan(exec_batch), thread_index));
-    RETURN_NOT_OK(
-        ExtractSegmenterValues(&segmenter_values_, exec_batch, segment_field_ids_));
+    RETURN_NOT_OK(ExtractSegmenterValues(&GetLocalState()->segmenter_values, exec_batch,
+                                         segment_field_ids_));
 if (is_cpu_parallel && segment_keys.size() > 0) { 
   return Status::NotImplemented("Segmented aggregation in a multi-threaded plan"); 
 } 
 if (is_cpu_parallel && segment_keys.size() > 0) { 
   return Status::NotImplemented("Segmented aggregation in a multi-threaded plan"); 
 } 
 
     // If the segment closes the current segment group, we can output segment group
     // aggregation.
@@ -292,7 +292,7 @@ Status ScalarAggregateNode::OutputResult(bool is_last) {
   batch.values.resize(kernels_.size() + segment_field_ids_.size());
 
   // First, insert segment keys
-  PlaceFields(batch, /*base=*/0, segmenter_values_);
+  PlaceFields(batch, /*base=*/0, GetLocalState()->segmenter_values);
 
   // Followed by aggregate values
   std::size_t base = segment_field_ids_.size();