Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
39 changes: 28 additions & 11 deletions BLAS/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@
# Compilers and flags
FC = gfortran
CC = gcc
FFLAGS = -O2 -fPIC -ffree-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude
FFLAGS_F77 = -O2 -fPIC -ffixed-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude
# Ensure .mod files are written to (and read from) build/
# Defaults: gfortran -> -J, ifort/ifx -> -module. You can still override MODFLAG on the make command line.
MODDIR = $(BUILD_DIR)
ifeq ($(findstring ifort,$(FC)),ifort)
MODFLAG ?= -module $(MODDIR)
else ifeq ($(findstring ifx,$(FC)),ifx)
MODFLAG ?= -module $(MODDIR)
else
MODFLAG ?= -J$(MODDIR)
endif
FFLAGS = -O2 -fPIC -ffree-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude -I$(MODDIR) $(MODFLAG)
FFLAGS_F77 = -O2 -fPIC -ffixed-line-length-none -Wuninitialized -Wmaybe-uninitialized -Iinclude -I$(MODDIR)
CFLAGS = -O2 -fPIC

# Directory structure
Expand Down Expand Up @@ -63,7 +73,8 @@ else
BLAS_LIB ?= -lrefblas
endif

# Optional: DIFFSIZES_access when using ISIZE globals (.f or .f90+wrappers when many vars)
# Optional: DIFFSIZES_access when using ISIZE globals (run_tapenade_blas.py writes .f or .f90+wrappers)
# When many ISIZE vars exceed F77 COMMON line limit, generator writes DIFFSIZES_access.f90 + wrappers instead of .f
# Prefer .f90 when present (may have more vars than stale .f)
# Must be defined before any rule that uses it as a prerequisite, so "make forward" (etc.) builds it first.
ifneq ($(wildcard $(SRC_DIR)/DIFFSIZES_access.f90),)
Expand Down Expand Up @@ -169,17 +180,22 @@ $(BUILD_DIR)/%_dep2.o: $(SRC_DIR)/%_dep2.f
$(FC) $(FFLAGS_F77) -c $< -o $@

# DIFFSIZES_access - F77 .f or F90 .f90 (generator picks based on COMMON line length)
# When .f90 exists: compile to produce .o and .mod; wrappers need .mod (depend on it explicitly)
# When .f90 exists: compile to produce .o and .mod; wrappers depend on .mod explicitly (avoids stale .o from .f)
$(BUILD_DIR)/diffsizes_access.mod: $(SRC_DIR)/DIFFSIZES_access.f90
$(FC) $(FFLAGS) -J$(BUILD_DIR) -c $< -o $(BUILD_DIR)/DIFFSIZES_access.o
@mkdir -p $(BUILD_DIR)
$(FC) $(FFLAGS) -c $< -o $(BUILD_DIR)/DIFFSIZES_access.o

# When .f90 exists: DIFFSIZES_access.o is produced as byproduct of diffsizes_access.mod (do not compile .f)
ifeq ($(wildcard $(SRC_DIR)/DIFFSIZES_access.f90),)
$(BUILD_DIR)/DIFFSIZES_access.o: $(SRC_DIR)/DIFFSIZES_access.f
$(FC) $(FFLAGS_F77) -c $< -o $@
else
$(BUILD_DIR)/DIFFSIZES_access.o: $(BUILD_DIR)/diffsizes_access.mod
endif

# DIFFSIZES_access_wrappers.f - external symbols for F90 module (set_*, get_*, check_*)
# Depend on .mod so we always build from .f90 when using F90 path (avoids stale .o from .f)
$(BUILD_DIR)/DIFFSIZES_access_wrappers.o: $(SRC_DIR)/DIFFSIZES_access_wrappers.f $(BUILD_DIR)/diffsizes_access.mod
$(FC) $(FFLAGS) -J$(BUILD_DIR) -c $(SRC_DIR)/DIFFSIZES_access_wrappers.f -o $@
$(FC) $(FFLAGS) -c $(SRC_DIR)/DIFFSIZES_access_wrappers.f -o $@

# DIFFSIZES handling (supports both Fortran 90 module and Fortran 77 include)
# For F90: DIFFSIZES.f90 is compiled to produce DIFFSIZES.o and DIFFSIZES.mod
Expand Down Expand Up @@ -292,31 +308,31 @@ $(BUILD_DIR)/libdiffblas_d.a: compile-d $(DIFFSIZES_ACCESS_OBJ)
@echo "Created libdiffblas_d.a with $$(ls $(BUILD_DIR)/*_d.o 2>/dev/null | wc -w) objects"

$(BUILD_DIR)/libdiffblas_d.so: compile-d
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_d.o 2>/dev/null)
@objs="$$(ls $(BUILD_DIR)/*_d.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs; else touch $@; fi

# Single library for all reverse mode differentiated code
$(BUILD_DIR)/libdiffblas_b.a: compile-b $(DIFFSIZES_ACCESS_OBJ)
@ar rcs $@ $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ)
@echo "Created libdiffblas_b.a with $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null | wc -w) objects"

$(BUILD_DIR)/libdiffblas_b.so: compile-b $(DIFFSIZES_ACCESS_OBJ)
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_b.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ)
@objs="$$(ls $(BUILD_DIR)/*_b.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/adStack.o $(DIFFSIZES_ACCESS_OBJ); else touch $@; fi

# Single library for all vector forward mode differentiated code
$(BUILD_DIR)/libdiffblas_dv.a: compile-dv $(DIFFSIZES_ACCESS_OBJ)
@ar rcs $@ $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null) $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
@echo "Created libdiffblas_dv.a with $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null | wc -w) objects"

$(BUILD_DIR)/libdiffblas_dv.so: compile-dv
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null) $(BUILD_DIR)/DIFFSIZES.o
@objs="$$(ls $(BUILD_DIR)/*_dv.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/DIFFSIZES.o; else touch $@; fi

# Single library for all vector reverse mode differentiated code
$(BUILD_DIR)/libdiffblas_bv.a: compile-bv $(DIFFSIZES_ACCESS_OBJ)
@ar rcs $@ $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
@echo "Created libdiffblas_bv.a with $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null | wc -w) objects"

$(BUILD_DIR)/libdiffblas_bv.so: compile-bv $(DIFFSIZES_ACCESS_OBJ)
@$(FC) -shared -o $@ $$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null) $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ)
@objs="$$(ls $(BUILD_DIR)/*_bv.o 2>/dev/null)"; if [ -n "$$objs" ]; then $(FC) -shared -o $@ $$objs $(BUILD_DIR)/adStack.o $(BUILD_DIR)/DIFFSIZES.o $(DIFFSIZES_ACCESS_OBJ); else touch $@; fi

# Note: Original BLAS functions come from $(BLAS_LIB) (librefblas in LAPACKDIR)
# No need to build a separate liborigblas
Expand Down Expand Up @@ -366,6 +382,7 @@ $(BUILD_DIR)/test_%_vector_reverse.o: $(TEST_DIR)/test_%_vector_reverse.f90 $(BU
clean:
@echo "Cleaning build directory..."
rm -rf $(BUILD_DIR)
rm -f *.mod
@echo "Clean complete."

# Rebuild everything
Expand Down
46 changes: 46 additions & 0 deletions BLAS/docs/TOLERANCES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Differentiation test tolerances

Tolerances and step sizes for finite-difference derivative checks in the BLAS differentiation test generator.

---

## Defaults

### rtol/atol by precision family

| Family | Meaning | rtol | atol |
|--------|---------|------|------|
| S | `S*` (single real) | 2.0e-3 | 2.0e-3 |
| C | `C*` (single complex) | 1.0e-3 | 1.0e-3 |
| D | `D*` (double real) | 1.0e-5 | 1.0e-5 |
| Z | `Z*` (double complex) | 1.0e-5 | 1.0e-5 |

### step size h by precision family

| Family | h |
|--------|---|
| S, C | 1.0e-3 |
| D, Z | 1.0e-7 |

---

## Overrides

### Mixed-precision D* (single-precision first differentiable input)

Applies when the routine behaves like “double output, but first differentiable input is single precision” (e.g. `DSDOT` with **SX** first; the generator also treats **SY** and **SB** as single-precision inputs for `D*`).

- **Scalar forward**: override **h = 1.0e-3** (rtol/atol remain `D*` base = 1.0e-5)
- **Scalar reverse / vector forward / vector reverse**: override **h = 1.0e-3**, **rtol = atol = 2.0e-3**

### Relaxed C* tolerance in vector reverse

Only for **single-precision complex** (`C*`) **vector reverse** tests:

| Routine family (examples) | rtol/atol |
|---------------------------|-----------|
| DOT (e.g. `CDOTC`) | 2.5e-2 |
| BLAS3 (e.g. `CGEMM`, `CSYMM`, `CHEMM`) | 1.0e-2 |
| BLAS2 banded MV (e.g. `CGBMV`, `CTBMV`, `CHBMV`) | 1.0e-2 |

All other `C*` modes use the base tolerance (1.0e-3). `Z*` does not use relaxed tolerances.
163 changes: 162 additions & 1 deletion BLAS/include/DIFFSIZES.f90
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,121 @@ MODULE DIFFSIZES
IMPLICIT NONE
INTEGER, PARAMETER :: nbdirsmax = 4
! ISIZE* are module variables (set via set_ISIZE*(), read via get_ISIZE*() or use directly after check)
INTEGER, SAVE :: isize1ofx = -1, isize1ofy = -1, isize2ofa = -1
INTEGER, SAVE :: isize1ofap = -1, isize1ofcx = -1, isize1ofcy = -1, isize1ofdx = -1, isize1ofdy = -1, isize1ofsx = -1, &
& isize1ofsy = -1, isize1ofx = -1, isize1ofy = -1, isize1ofzx = -1, isize1ofzy = -1, isize2ofa = -1, isize2ofb = -1
CONTAINS
SUBROUTINE set_ISIZE1OFAp(val)
INTEGER, INTENT(IN) :: val
isize1ofap = val
END SUBROUTINE set_ISIZE1OFAp

INTEGER FUNCTION get_ISIZE1OFAp()
get_ISIZE1OFAp = isize1ofap
END FUNCTION get_ISIZE1OFAp

SUBROUTINE check_ISIZE1OFAp_initialized()
IF (isize1ofap < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofap not set. Call set_ISIZE1OFAp before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFAp_initialized

SUBROUTINE set_ISIZE1OFCx(val)
INTEGER, INTENT(IN) :: val
isize1ofcx = val
END SUBROUTINE set_ISIZE1OFCx

INTEGER FUNCTION get_ISIZE1OFCx()
get_ISIZE1OFCx = isize1ofcx
END FUNCTION get_ISIZE1OFCx

SUBROUTINE check_ISIZE1OFCx_initialized()
IF (isize1ofcx < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofcx not set. Call set_ISIZE1OFCx before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFCx_initialized

SUBROUTINE set_ISIZE1OFCy(val)
INTEGER, INTENT(IN) :: val
isize1ofcy = val
END SUBROUTINE set_ISIZE1OFCy

INTEGER FUNCTION get_ISIZE1OFCy()
get_ISIZE1OFCy = isize1ofcy
END FUNCTION get_ISIZE1OFCy

SUBROUTINE check_ISIZE1OFCy_initialized()
IF (isize1ofcy < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofcy not set. Call set_ISIZE1OFCy before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFCy_initialized

SUBROUTINE set_ISIZE1OFDx(val)
INTEGER, INTENT(IN) :: val
isize1ofdx = val
END SUBROUTINE set_ISIZE1OFDx

INTEGER FUNCTION get_ISIZE1OFDx()
get_ISIZE1OFDx = isize1ofdx
END FUNCTION get_ISIZE1OFDx

SUBROUTINE check_ISIZE1OFDx_initialized()
IF (isize1ofdx < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofdx not set. Call set_ISIZE1OFDx before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFDx_initialized

SUBROUTINE set_ISIZE1OFDy(val)
INTEGER, INTENT(IN) :: val
isize1ofdy = val
END SUBROUTINE set_ISIZE1OFDy

INTEGER FUNCTION get_ISIZE1OFDy()
get_ISIZE1OFDy = isize1ofdy
END FUNCTION get_ISIZE1OFDy

SUBROUTINE check_ISIZE1OFDy_initialized()
IF (isize1ofdy < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofdy not set. Call set_ISIZE1OFDy before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFDy_initialized

SUBROUTINE set_ISIZE1OFSx(val)
INTEGER, INTENT(IN) :: val
isize1ofsx = val
END SUBROUTINE set_ISIZE1OFSx

INTEGER FUNCTION get_ISIZE1OFSx()
get_ISIZE1OFSx = isize1ofsx
END FUNCTION get_ISIZE1OFSx

SUBROUTINE check_ISIZE1OFSx_initialized()
IF (isize1ofsx < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofsx not set. Call set_ISIZE1OFSx before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFSx_initialized

SUBROUTINE set_ISIZE1OFSy(val)
INTEGER, INTENT(IN) :: val
isize1ofsy = val
END SUBROUTINE set_ISIZE1OFSy

INTEGER FUNCTION get_ISIZE1OFSy()
get_ISIZE1OFSy = isize1ofsy
END FUNCTION get_ISIZE1OFSy

SUBROUTINE check_ISIZE1OFSy_initialized()
IF (isize1ofsy < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofsy not set. Call set_ISIZE1OFSy before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFSy_initialized

SUBROUTINE set_ISIZE1OFX(val)
INTEGER, INTENT(IN) :: val
isize1ofx = val
Expand Down Expand Up @@ -36,6 +149,38 @@ SUBROUTINE check_ISIZE1OFY_initialized()
END IF
END SUBROUTINE check_ISIZE1OFY_initialized

SUBROUTINE set_ISIZE1OFZx(val)
INTEGER, INTENT(IN) :: val
isize1ofzx = val
END SUBROUTINE set_ISIZE1OFZx

INTEGER FUNCTION get_ISIZE1OFZx()
get_ISIZE1OFZx = isize1ofzx
END FUNCTION get_ISIZE1OFZx

SUBROUTINE check_ISIZE1OFZx_initialized()
IF (isize1ofzx < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofzx not set. Call set_ISIZE1OFZx before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFZx_initialized

SUBROUTINE set_ISIZE1OFZy(val)
INTEGER, INTENT(IN) :: val
isize1ofzy = val
END SUBROUTINE set_ISIZE1OFZy

INTEGER FUNCTION get_ISIZE1OFZy()
get_ISIZE1OFZy = isize1ofzy
END FUNCTION get_ISIZE1OFZy

SUBROUTINE check_ISIZE1OFZy_initialized()
IF (isize1ofzy < 0) THEN
WRITE(*,'(A)') 'Error: isize1ofzy not set. Call set_ISIZE1OFZy before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE1OFZy_initialized

SUBROUTINE set_ISIZE2OFA(val)
INTEGER, INTENT(IN) :: val
isize2ofa = val
Expand All @@ -52,4 +197,20 @@ SUBROUTINE check_ISIZE2OFA_initialized()
END IF
END SUBROUTINE check_ISIZE2OFA_initialized

SUBROUTINE set_ISIZE2OFB(val)
INTEGER, INTENT(IN) :: val
isize2ofb = val
END SUBROUTINE set_ISIZE2OFB

INTEGER FUNCTION get_ISIZE2OFB()
get_ISIZE2OFB = isize2ofb
END FUNCTION get_ISIZE2OFB

SUBROUTINE check_ISIZE2OFB_initialized()
IF (isize2ofb < 0) THEN
WRITE(*,'(A)') 'Error: isize2ofb not set. Call set_ISIZE2OFB before differentiated routine.'
STOP 1
END IF
END SUBROUTINE check_ISIZE2OFB_initialized

END MODULE DIFFSIZES
Loading
Loading