@@ -21,6 +21,8 @@ namespace
21
21
#define deposit_bits (v,m ) _pdep_u32(v,m)
22
22
#define extract_bits (v,m ) _pext_u32(v,m)
23
23
#else
24
+ // For ARM64 we could use SVE2-BITPERM intrinsics BDEP/BEXT if supported by the platform/compiler.
25
+
24
26
// N3864 - A constexpr bitwise operations library for C++
25
27
// https://github.com/fmatthew5876/stdcxx-bitops
26
28
uint32_t deposit_bits (uint32_t val, int mask) noexcept
@@ -295,8 +297,64 @@ namespace
295
297
if (!dptr)
296
298
return E_POINTER;
297
299
298
- // TODO: linear to swizzle x,y,z
299
- return E_NOTIMPL;
300
+ if (srcImages[0 ].rowPitch > UINT32_MAX
301
+ || srcImages[0 ].slicePitch > UINT32_MAX)
302
+ return HRESULT_E_ARITHMETIC_OVERFLOW;
303
+
304
+ const size_t height = isCompressed ? (srcImages[0 ].height + 3 ) / 4 : srcImages[0 ].height ;
305
+ const size_t width = isCompressed ? (srcImages[0 ].width + 3 ) / 4 : srcImages[0 ].width ;
306
+
307
+ const size_t maxOffset = height * width * depth * bytesPerPixel;
308
+ const size_t tail = destImage.slicePitch * depth;
309
+ if (maxOffset > tail)
310
+ return E_UNEXPECTED;
311
+
312
+ for (size_t z = 0 ; z < depth; ++z)
313
+ {
314
+ const uint8_t * sptr = srcImages[z].pixels ;
315
+ if (!sptr)
316
+ return E_POINTER;
317
+
318
+ const size_t rowPitch = srcImages[z].rowPitch ;
319
+ const uint8_t * endPtr = sptr + srcImages[z].slicePitch ;
320
+ for (size_t y = 0 ; y < height; ++y)
321
+ {
322
+ if (sptr >= endPtr)
323
+ return E_FAIL;
324
+
325
+ const uint8_t * sourcePixelPointer = sptr;
326
+ for (size_t x = 0 ; x < width; ++x)
327
+ {
328
+ const uint32_t swizzleIndex = deposit_bits (static_cast <uint32_t >(x), xBytesMask)
329
+ + deposit_bits (static_cast <uint32_t >(y), yBytesMask)
330
+ + deposit_bits (static_cast <uint32_t >(z), zBytesMask);
331
+ const size_t swizzleOffset = swizzleIndex * bytesPerPixel;
332
+ if (swizzleOffset >= maxOffset)
333
+ return E_UNEXPECTED;
334
+
335
+ uint8_t * destPixelPointer = dptr + swizzleOffset;
336
+ memcpy (destPixelPointer, sourcePixelPointer, bytesPerPixel);
337
+
338
+ sourcePixelPointer += bytesPerPixel;
339
+ }
340
+
341
+ sptr += rowPitch;
342
+ }
343
+ }
344
+
345
+ if (tail > maxOffset)
346
+ {
347
+ if (isCompressed)
348
+ {
349
+ // TODO: Pad with copy of last block
350
+ }
351
+ else
352
+ {
353
+ // TODO: zero out tail space
354
+ }
355
+ }
356
+
357
+ return S_OK;
300
358
}
301
359
302
360
// ---------------------------------------------------------------------------------
@@ -473,6 +531,7 @@ HRESULT DirectX::StandardSwizzle(
473
531
474
532
for (size_t slice = 0 ; slice < depth; ++slice, ++index )
475
533
{
534
+ // Validate source image array.
476
535
if (index >= nimages)
477
536
{
478
537
result.Release ();
@@ -506,6 +565,21 @@ HRESULT DirectX::StandardSwizzle(
506
565
result.Release ();
507
566
return E_FAIL;
508
567
}
568
+
569
+ if (!src.rowPitch || !src.slicePitch )
570
+ {
571
+ result.Release ();
572
+ return E_FAIL;
573
+ }
574
+
575
+ assert (dst.rowPitch != 0 && dst.slicePitch != 0 );
576
+
577
+ uint64_t slicePitch = static_cast <uint64_t >(src.rowPitch ) * static_cast <uint64_t >(src.height );
578
+ if (static_cast <uint64_t >(slicePitch) > src.slicePitch )
579
+ {
580
+ result.Release ();
581
+ return E_FAIL;
582
+ }
509
583
}
510
584
511
585
if (toSwizzle)
@@ -643,70 +717,3 @@ HRESULT DirectX::StandardSwizzle(
643
717
644
718
return S_OK;
645
719
}
646
-
647
-
648
- #if 0
649
- if (toSwizzle)
650
- {
651
- // row-major to z-order curve
652
- const Image* destImages = result.GetImages();
653
- for (size_t z = 0; z < depth; z++)
654
- {
655
- size_t rowPitch = srcImages[z].rowPitch;
656
- const uint8_t* sptr = srcImages[z].pixels;
657
- if (!sptr)
658
- return E_POINTER;
659
- for (size_t y = 0; y < height; y++)
660
- {
661
- for (size_t x = 0; x < width; x++)
662
- {
663
- uint32_t swizzle3Dindex = deposit_bits(static_cast<uint32_t>(x), xBytesMask) + deposit_bits(static_cast<uint32_t>(y), yBytesMask) + deposit_bits(static_cast<uint32_t>(z), zBytesMask);
664
- uint32_t swizzle2Dindex = swizzle3Dindex % (metadata.width * metadata.height);
665
- uint32_t swizzleSlice = swizzle3Dindex / (metadata.width * metadata.height);
666
- size_t swizzleOffset = swizzle2Dindex * bytesPerPixel;
667
-
668
- size_t rowMajorOffset = y * rowPitch + x * bytesPerPixel;
669
-
670
- uint8_t* dptr = destImages[swizzleSlice].pixels;
671
- if (!dptr)
672
- return E_POINTER;
673
-
674
- const uint8_t* sourcePixelPointer = sptr + rowMajorOffset;
675
- uint8_t* destPixelPointer = dptr + swizzleOffset;
676
- memcpy(destPixelPointer, sourcePixelPointer, bytesPerPixel);
677
- }
678
- }
679
- }
680
- }
681
- else
682
- {
683
- // z-order curve to row-major
684
- const Image* destImages = result.GetImages();
685
- for (size_t z = 0; z < depth; z++)
686
- {
687
- const uint8_t* sptr = srcImages[z].pixels;
688
- if (!sptr)
689
- return E_POINTER;
690
-
691
- for (size_t swizzleIndex = 0; swizzleIndex < (width * height); swizzleIndex++)
692
- {
693
- size_t swizzleOffset = swizzleIndex * bytesPerPixel;
694
- const uint8_t* sourcePixelPointer = sptr + swizzleOffset;
695
-
696
- size_t index3D = z * width * height + swizzleIndex;
697
- uint32_t destX = extract_bits(index3D, xBytesMask);
698
- uint32_t destY = extract_bits(index3D, yBytesMask);
699
- uint32_t destZ = extract_bits(index3D, zBytesMask);
700
- size_t rowPitch = destImages[z].rowPitch;
701
- size_t rowMajorOffset = destY * rowPitch + destX * bytesPerPixel;
702
-
703
- uint8_t* dptr = destImages[destZ].pixels;
704
- if (!dptr)
705
- return E_POINTER;
706
- uint8_t* destPixelPointer = dptr + rowMajorOffset;
707
-
708
- memcpy(destPixelPointer, sourcePixelPointer, bytesPerPixel);
709
- }
710
- }
711
- }
712
- #endif
0 commit comments