Skip to content

Commit 4b9693f

Browse files
author
Martin Köhler
committed
Add CBLAS for AXPBY
1 parent c571921 commit 4b9693f

15 files changed

+332
-34
lines changed

CBLAS/include/cblas_f77.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@
243243
#define F77_sswap(...) F77_sswap_base(__VA_ARGS__)
244244
#define F77_scopy(...) F77_scopy_base(__VA_ARGS__)
245245
#define F77_saxpy(...) F77_saxpy_base(__VA_ARGS__)
246-
#define F77_saxpby(...) F77_saxpby_base(__VA_ARGS__)
246+
#define F77_saxpby(...) F77_saxpby_base(__VA_ARGS__)
247247
#define F77_sdot_sub(...) F77_sdot_sub_base(__VA_ARGS__)
248248
#define F77_sdsdot_sub(...) F77_sdsdot_sub_base(__VA_ARGS__)
249249
#define F77_sscal(...) F77_sscal_base(__VA_ARGS__)
@@ -610,7 +610,7 @@ void F77_srotmg_base(float *,float *,float *,const float *, float *);
610610
void F77_sswap_base(FINT, float *, FINT, float *, FINT);
611611
void F77_scopy_base(FINT, const float *, FINT, float *, FINT);
612612
void F77_saxpy_base(FINT, const float *, const float *, FINT, float *, FINT);
613-
void F77_saxpy_base(FINT, const float *, const float *, FINT, float *, float *, FINT);
613+
void F77_saxpby_base(FINT, const float *, const float *, FINT, const float *, float *, FINT);
614614
void F77_sdot_sub_base(FINT, const float *, FINT, const float *, FINT, float *);
615615
void F77_sdsdot_sub_base(FINT, const float *, const float *, FINT, const float *, FINT, float *);
616616
void F77_sscal_base(FINT, const float *, float *, FINT);
@@ -627,7 +627,7 @@ void F77_drotmg_base(double *,double *,double *,const double *, double *);
627627
void F77_dswap_base(FINT, double *, FINT, double *, FINT);
628628
void F77_dcopy_base(FINT, const double *, FINT, double *, FINT);
629629
void F77_daxpy_base(FINT, const double *, const double *, FINT, double *, FINT);
630-
void F77_daxpby_base(FINT, const double *, const double *, FINT, double *, double *, FINT);
630+
void F77_daxpby_base(FINT, const double *, const double *, FINT, const double *, double *, FINT);
631631
void F77_dswap_base(FINT, double *, FINT, double *, FINT);
632632
void F77_dsdot_sub_base(FINT, const float *, FINT, const float *, FINT, double *);
633633
void F77_ddot_sub_base(FINT, const double *, FINT, const double *, FINT, double *);
@@ -643,7 +643,7 @@ void F77_csrot_base(FINT, void *X, FINT, void *, FINT, const float *, const floa
643643
void F77_cswap_base(FINT, void *, FINT, void *, FINT);
644644
void F77_ccopy_base(FINT, const void *, FINT, void *, FINT);
645645
void F77_caxpy_base(FINT, const void *, const void *, FINT, void *, FINT);
646-
void F77_caxpby_base(FINT, const void *, const void *, FINT, void *, void *, FINT);
646+
void F77_caxpby_base(FINT, const void *, const void *, FINT, const void *, void *, FINT);
647647
void F77_cswap_base(FINT, void *, FINT, void *, FINT);
648648
void F77_cdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *);
649649
void F77_cdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *);
@@ -661,7 +661,7 @@ void F77_zdrot_base(FINT, void *X, FINT, void *, FINT, const double *, const dou
661661
void F77_zswap_base(FINT, void *, FINT, void *, FINT);
662662
void F77_zcopy_base(FINT, const void *, FINT, void *, FINT);
663663
void F77_zaxpy_base(FINT, const void *, const void *, FINT, void *, FINT);
664-
void F77_zaxpby_base(FINT, const void *, const void *, FINT, void*, void *, FINT);
664+
void F77_zaxpby_base(FINT, const void *, const void *, FINT, const void*, void *, FINT);
665665
void F77_zswap_base(FINT, void *, FINT, void *, FINT);
666666
void F77_zdotc_sub_base(FINT, const void *, FINT, const void *, FINT, void *);
667667
void F77_zdotu_sub_base(FINT, const void *, FINT, const void *, FINT, void *);

CBLAS/src/CMakeLists.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,29 @@ set(SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c
1616
cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c
1717
cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c
1818
cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f
19-
isamaxsub.f)
19+
isamaxsub.f cblas_saxpby.c)
2020

2121
# Files for level 1 double precision real
2222
set(DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c
2323
cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c
2424
cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c
2525
cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f
26-
dasumsub.f idamaxsub.f)
26+
dasumsub.f idamaxsub.f cblas_daxpby.c)
2727

2828
# Files for level 1 single precision complex
2929
set(CLEV1 cblas_crotg.c cblas_csrot.c
3030
cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c
3131
cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c
3232
cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f
33-
cblas_scabs1.c scabs1sub.f )
33+
cblas_scabs1.c scabs1sub.f cblas_caxpby.c)
3434

3535
# Files for level 1 double precision complex
3636
set(ZLEV1 cblas_zrotg.c cblas_zdrot.c
3737
cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c
3838
cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c
3939
cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f
4040
dzasumsub.f dznrm2sub.f izamaxsub.f
41-
cblas_dcabs1.c dcabs1sub.f)
41+
cblas_dcabs1.c dcabs1sub.f cblas_zaxpby.c)
4242

4343
# Common files for level 1 single precision
4444
set(SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f)

CBLAS/src/Makefile

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,29 +26,29 @@ slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \
2626
cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \
2727
cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \
2828
cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \
29-
isamaxsub.o
29+
isamaxsub.o cblas_saxpby.o
3030

3131
# Files for level 1 double precision real
3232
dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \
3333
cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \
3434
cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \
3535
cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \
36-
dasumsub.o idamaxsub.o
36+
dasumsub.o idamaxsub.o cblas_daxpby.o
3737

3838
# Files for level 1 single precision complex
3939
clev1 = cblas_crotg.o cblas_csrot.o \
4040
cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \
4141
cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \
4242
cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o \
43-
cblas_scabs1.o scabs1sub.o
43+
cblas_scabs1.o scabs1sub.o cblas_caxpby.o
4444

4545
# Files for level 1 double precision complex
4646
zlev1 = cblas_zrotg.o cblas_zdrot.o \
4747
cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
4848
cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \
4949
cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \
5050
dzasumsub.o dznrm2sub.o izamaxsub.o \
51-
cblas_dcabs1.o dcabs1sub.o
51+
cblas_dcabs1.o dcabs1sub.o cblas_zaxpby.o
5252

5353
# Common files for level 1 single precision
5454
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o

CBLAS/src/cblas_caxpby.c

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* cblas_caxpby.c
3+
*
4+
* The program is a C interface to caxpby.
5+
*
6+
* Written by Martin Koehler. 08/26/2024
7+
*
8+
*/
9+
#include "cblas.h"
10+
#include "cblas_f77.h"
11+
void API_SUFFIX(cblas_caxpby)( const CBLAS_INT N, const void *alpha, const void *X,
12+
const CBLAS_INT incX, const void *beta, void *Y, const CBLAS_INT incY)
13+
{
14+
#ifdef F77_INT
15+
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
16+
#else
17+
#define F77_N N
18+
#define F77_incX incX
19+
#define F77_incY incY
20+
#endif
21+
F77_caxpby( &F77_N, alpha, X, &F77_incX, beta, Y, &F77_incY);
22+
}

CBLAS/src/cblas_daxpby.c

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* cblas_daxpby.c
3+
*
4+
* The program is a C interface to daxpby.
5+
*
6+
* Written by Martin Koehler. 08/26/2024
7+
*
8+
*/
9+
#include "cblas.h"
10+
#include "cblas_f77.h"
11+
void API_SUFFIX(cblas_daxpby)( const CBLAS_INT N, const double alpha, const double *X,
12+
const CBLAS_INT incX, const double beta, double *Y, const CBLAS_INT incY)
13+
{
14+
#ifdef F77_INT
15+
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
16+
#else
17+
#define F77_N N
18+
#define F77_incX incX
19+
#define F77_incY incY
20+
#endif
21+
F77_daxpby( &F77_N, &alpha, X, &F77_incX, &beta, Y, &F77_incY);
22+
}

CBLAS/src/cblas_saxpby.c

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* cblas_saxpby.c
3+
*
4+
* The program is a C interface to saxpby.
5+
* It calls the fortran wrapper before calling saxpby.
6+
*
7+
* Written by Martin Koehler, 08/24/2024
8+
*
9+
*/
10+
#include "cblas.h"
11+
#include "cblas_f77.h"
12+
void API_SUFFIX(cblas_saxpby)( const CBLAS_INT N, const float alpha, const float *X,
13+
const CBLAS_INT incX, const float beta, float *Y, const CBLAS_INT incY)
14+
{
15+
#ifdef F77_INT
16+
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
17+
#else
18+
#define F77_N N
19+
#define F77_incX incX
20+
#define F77_incY incY
21+
#endif
22+
F77_saxpby( &F77_N, &alpha, X, &F77_incX, &beta, Y, &F77_incY);
23+
}

CBLAS/src/cblas_zaxpby.c

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* cblas_zaxpby.c
3+
*
4+
* The program is a C interface to zaxpby.
5+
*
6+
* Written by Martin Koehler, 08/26/2024
7+
*
8+
*/
9+
#include "cblas.h"
10+
#include "cblas_f77.h"
11+
void API_SUFFIX(cblas_zaxpby)( const CBLAS_INT N, const void *alpha, const void *X,
12+
const CBLAS_INT incX, const void *beta, void *Y, const CBLAS_INT incY)
13+
{
14+
#ifdef F77_INT
15+
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
16+
#else
17+
#define F77_N N
18+
#define F77_incX incX
19+
#define F77_incY incY
20+
#endif
21+
F77_zaxpby( &F77_N, alpha, X, &F77_incX, beta, Y, &F77_incY);
22+
}

CBLAS/testing/c_cblas1.c

+8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ void F77_caxpy(const CBLAS_INT *N, const void *alpha, void *X,
1515
return;
1616
}
1717

18+
void F77_caxpby(const CBLAS_INT *N, const void *alpha, void *X,
19+
const CBLAS_INT *incX, const void *beta, void *Y, const CBLAS_INT *incY)
20+
{
21+
cblas_caxpby(*N, alpha, X, *incX, beta, Y, *incY);
22+
return;
23+
}
24+
25+
1826
void F77_ccopy(const CBLAS_INT *N, void *X, const CBLAS_INT *incX,
1927
void *Y, const CBLAS_INT *incY)
2028
{

CBLAS/testing/c_cblat1.f

+63-6
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ PROGRAM CCBLAT1
1919
DATA SFAC/9.765625E-4/
2020
* .. Executable Statements ..
2121
WRITE (NOUT,99999)
22-
DO 20 IC = 1, 10
22+
DO 20 IC = 1, 11
2323
ICASE = IC
2424
CALL HEADER
2525
*
@@ -32,7 +32,7 @@ PROGRAM CCBLAT1
3232
INCX = 9999
3333
INCY = 9999
3434
MODE = 9999
35-
IF (ICASE.LE.5) THEN
35+
IF (ICASE.LE.5 .OR. ICASE.EQ.11) THEN
3636
CALL CHECK2(SFAC)
3737
ELSE IF (ICASE.GE.6) THEN
3838
CALL CHECK1(SFAC)
@@ -53,7 +53,7 @@ SUBROUTINE HEADER
5353
INTEGER ICASE, INCX, INCY, MODE, N
5454
LOGICAL PASS
5555
* .. Local Arrays ..
56-
CHARACTER*15 L(10)
56+
CHARACTER*15 L(11)
5757
* .. Common blocks ..
5858
COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS
5959
* .. Data statements ..
@@ -67,6 +67,8 @@ SUBROUTINE HEADER
6767
DATA L(8)/'CBLAS_CSCAL'/
6868
DATA L(9)/'CBLAS_CSSCAL'/
6969
DATA L(10)/'CBLAS_ICAMAX'/
70+
DATA L(11)/'CBLAS_CAXPBY'/
71+
7072
* .. Executable Statements ..
7173
WRITE (NOUT,99999) ICASE, L(ICASE)
7274
RETURN
@@ -284,23 +286,26 @@ SUBROUTINE CHECK2(SFAC)
284286
INTEGER ICASE, INCX, INCY, MODE, N
285287
LOGICAL PASS
286288
* .. Local Scalars ..
287-
COMPLEX CA,CTEMP
289+
COMPLEX CA,CB,CTEMP
288290
INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY
289291
* .. Local Arrays ..
290292
COMPLEX CDOT(1), CSIZE1(4), CSIZE2(7,2), CSIZE3(14),
291293
+ CT10X(7,4,4), CT10Y(7,4,4), CT6(4,4), CT7(4,4),
292-
+ CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7)
294+
+ CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7),
295+
+ CT11(7,4,4)
293296
INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4)
294297
* .. External Functions ..
295298
EXTERNAL CDOTCTEST, CDOTUTEST
296299
* .. External Subroutines ..
297-
EXTERNAL CAXPYTEST, CCOPYTEST, CSWAPTEST, CTEST
300+
EXTERNAL CAXPYTEST, CCOPYTEST, CSWAPTEST, CTEST,
301+
+ CAXPBYTEST
298302
* .. Intrinsic Functions ..
299303
INTRINSIC ABS, MIN
300304
* .. Common blocks ..
301305
COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS
302306
* .. Data statements ..
303307
DATA CA/(0.4E0,-0.7E0)/
308+
DATA CB/(0.7E0,-0.4E0)/
304309
DATA INCXS/1, 2, -2, -1/
305310
DATA INCYS/1, -2, 1, -2/
306311
DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/
@@ -470,6 +475,54 @@ SUBROUTINE CHECK2(SFAC)
470475
+ (1.54E0,1.54E0), (1.54E0,1.54E0),
471476
+ (1.54E0,1.54E0), (1.54E0,1.54E0),
472477
+ (1.54E0,1.54E0), (1.54E0,1.54E0)/
478+
479+
DATA ((CT11(I,J,1),I=1,7),J=1,4)/(0.6E0,-0.6E0),
480+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
481+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
482+
+ (-0.1E0,-1.47E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
483+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
484+
+ (0.0E0,0.0E0), (-0.1E0,-1.47E0),
485+
+ (-1.08E0,0.71E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
486+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
487+
+ (-0.1E0,-1.47E0), (-1.08E0,0.71E0),
488+
+ (-0.42E0,-0.99E0), (-0.61E0,-0.85E0),
489+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/
490+
DATA ((CT11(I,J,2),I=1,7),J=1,4)/(0.6E0,-0.6E0),
491+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
492+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
493+
+ (-0.1E0,-1.47E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
494+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
495+
+ (0.0E0,0.0E0), (-0.49E0,-0.95E0),
496+
+ (-0.9E0,0.5E0),(-0.03E0,-1.51E0), (0.0E0,0.0E0),
497+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
498+
+ (0.36E0,0.00E0), (-0.9E0,0.5E0),
499+
+ (-0.39E0,-0.23E0), (0.1E0,-0.5E0),
500+
+ (-0.82E0,-0.39E0), (-0.5E0,-0.3E0),
501+
+ (0.0E0,-1.62E0)/
502+
DATA ((CT11(I,J,3),I=1,7),J=1,4)/(0.6E0,-0.6E0),
503+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
504+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
505+
+ (-0.1E0,-1.47E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
506+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
507+
+ (0.0E0,0.0E0), (-0.49E0,-0.95E0),
508+
+ (-0.71E0,-0.1E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
509+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
510+
+ (0.36E0,0.00E0), (-1.07E0,1.18E0),
511+
+ (-0.42E0,-0.99E0), (-0.41E0,-1.2E0),
512+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/
513+
DATA ((CT11(I,J,4),I=1,7),J=1,4)/(0.6E0,-0.6E0),
514+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
515+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
516+
+ (-0.1E0,-1.47E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
517+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
518+
+ (0.0E0,0.0E0), (-0.1E0,-1.47E0), (-0.9E0,0.5E0),
519+
+ (-0.4E0,-0.7E0), (0.0E0,0.0E0), (0.0E0,0.0E0),
520+
+ (0.0E0,0.0E0), (0.0E0,0.0E0), (-0.1E0,-1.47E0),
521+
+ (-0.9E0,0.5E0),(-0.4E0,-0.7E0), (0.1E0,-0.5E0),
522+
+ (-0.82E0,-0.39E0), (-0.5E0,-0.3E0),
523+
+ (-0.2E0,-1.27E0)/
524+
525+
473526
* .. Executable Statements ..
474527
DO 60 KI = 1, 4
475528
INCX = INCXS(KI)
@@ -510,6 +563,10 @@ SUBROUTINE CHECK2(SFAC)
510563
CALL CSWAPTEST(N,CX,INCX,CY,INCY)
511564
CALL CTEST(LENX,CX,CT10X(1,KN,KI),CSIZE3,1.0E0)
512565
CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0)
566+
ELSE IF (ICASE.EQ.11) THEN
567+
* .. CAXPBYTEST ..
568+
CALL CAXPBYTEST(N,CA,CX,INCX,CB,CY,INCY)
569+
CALL CTEST(LENY,CY,CT11(1,KN,KI),CSIZE2(1,KSIZE),SFAC)
513570
ELSE
514571
WRITE (NOUT,*) ' Shouldn''t be here in CHECK2'
515572
STOP

CBLAS/testing/c_dblas1.c

+8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ void F77_daxpy(const CBLAS_INT *N, const double *alpha, const double *X,
2020
return;
2121
}
2222

23+
void F77_daxpby(const CBLAS_INT *N, const double *alpha, const double *X,
24+
const CBLAS_INT *incX, const double *beta, double *Y, const CBLAS_INT *incY)
25+
{
26+
cblas_daxpby(*N, *alpha, X, *incX, *beta, Y, *incY);
27+
return;
28+
}
29+
30+
2331
void F77_dcopy(const CBLAS_INT *N, double *X, const CBLAS_INT *incX,
2432
double *Y, const CBLAS_INT *incY)
2533
{

0 commit comments

Comments
 (0)