Updates
This commit is contained in:
18
fftw-3.3.10/libbench2/Makefile.am
Normal file
18
fftw-3.3.10/libbench2/Makefile.am
Normal file
@@ -0,0 +1,18 @@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
noinst_LIBRARIES=libbench2.a
|
||||
|
||||
libbench2_a_SOURCES=after-ccopy-from.c after-ccopy-to.c \
|
||||
after-hccopy-from.c after-hccopy-to.c after-rcopy-from.c \
|
||||
after-rcopy-to.c allocate.c aset.c bench-cost-postprocess.c \
|
||||
bench-exit.c bench-main.c can-do.c caset.c dotens2.c info.c main.c \
|
||||
mflops.c mp.c ovtpvt.c pow2.c problem.c report.c speed.c tensor.c \
|
||||
timer.c useropt.c util.c verify-dft.c verify-lib.c verify-r2r.c \
|
||||
verify-rdft2.c verify.c zero.c bench-user.h bench.h verify.h \
|
||||
my-getopt.c my-getopt.h
|
||||
|
||||
benchmark: all
|
||||
@echo "nothing to benchmark"
|
||||
|
||||
accuracy: all
|
||||
@echo "nothing to benchmark"
|
||||
|
||||
778
fftw-3.3.10/libbench2/Makefile.in
Normal file
778
fftw-3.3.10/libbench2/Makefile.in
Normal file
@@ -0,0 +1,778 @@
|
||||
# Makefile.in generated by automake 1.16.3 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
VPATH = @srcdir@
|
||||
am__is_gnu_make = { \
|
||||
if test -z '$(MAKELEVEL)'; then \
|
||||
false; \
|
||||
elif test -n '$(MAKE_HOST)'; then \
|
||||
true; \
|
||||
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
|
||||
true; \
|
||||
else \
|
||||
false; \
|
||||
fi; \
|
||||
}
|
||||
am__make_running_with_option = \
|
||||
case $${target_option-} in \
|
||||
?) ;; \
|
||||
*) echo "am__make_running_with_option: internal error: invalid" \
|
||||
"target option '$${target_option-}' specified" >&2; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
has_opt=no; \
|
||||
sane_makeflags=$$MAKEFLAGS; \
|
||||
if $(am__is_gnu_make); then \
|
||||
sane_makeflags=$$MFLAGS; \
|
||||
else \
|
||||
case $$MAKEFLAGS in \
|
||||
*\\[\ \ ]*) \
|
||||
bs=\\; \
|
||||
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
|
||||
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
|
||||
esac; \
|
||||
fi; \
|
||||
skip_next=no; \
|
||||
strip_trailopt () \
|
||||
{ \
|
||||
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
|
||||
}; \
|
||||
for flg in $$sane_makeflags; do \
|
||||
test $$skip_next = yes && { skip_next=no; continue; }; \
|
||||
case $$flg in \
|
||||
*=*|--*) continue;; \
|
||||
-*I) strip_trailopt 'I'; skip_next=yes;; \
|
||||
-*I?*) strip_trailopt 'I';; \
|
||||
-*O) strip_trailopt 'O'; skip_next=yes;; \
|
||||
-*O?*) strip_trailopt 'O';; \
|
||||
-*l) strip_trailopt 'l'; skip_next=yes;; \
|
||||
-*l?*) strip_trailopt 'l';; \
|
||||
-[dEDm]) skip_next=yes;; \
|
||||
-[JT]) skip_next=yes;; \
|
||||
esac; \
|
||||
case $$flg in \
|
||||
*$$target_option*) has_opt=yes; break;; \
|
||||
esac; \
|
||||
done; \
|
||||
test $$has_opt = yes
|
||||
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
|
||||
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
subdir = libbench2
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
|
||||
$(top_srcdir)/m4/acx_pthread.m4 \
|
||||
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
|
||||
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
|
||||
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_version.m4 \
|
||||
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
|
||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
||||
$(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = $(top_builddir)/config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
LIBRARIES = $(noinst_LIBRARIES)
|
||||
ARFLAGS = cru
|
||||
AM_V_AR = $(am__v_AR_@AM_V@)
|
||||
am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
|
||||
am__v_AR_0 = @echo " AR " $@;
|
||||
am__v_AR_1 =
|
||||
libbench2_a_AR = $(AR) $(ARFLAGS)
|
||||
libbench2_a_LIBADD =
|
||||
am_libbench2_a_OBJECTS = after-ccopy-from.$(OBJEXT) \
|
||||
after-ccopy-to.$(OBJEXT) after-hccopy-from.$(OBJEXT) \
|
||||
after-hccopy-to.$(OBJEXT) after-rcopy-from.$(OBJEXT) \
|
||||
after-rcopy-to.$(OBJEXT) allocate.$(OBJEXT) aset.$(OBJEXT) \
|
||||
bench-cost-postprocess.$(OBJEXT) bench-exit.$(OBJEXT) \
|
||||
bench-main.$(OBJEXT) can-do.$(OBJEXT) caset.$(OBJEXT) \
|
||||
dotens2.$(OBJEXT) info.$(OBJEXT) main.$(OBJEXT) \
|
||||
mflops.$(OBJEXT) mp.$(OBJEXT) ovtpvt.$(OBJEXT) pow2.$(OBJEXT) \
|
||||
problem.$(OBJEXT) report.$(OBJEXT) speed.$(OBJEXT) \
|
||||
tensor.$(OBJEXT) timer.$(OBJEXT) useropt.$(OBJEXT) \
|
||||
util.$(OBJEXT) verify-dft.$(OBJEXT) verify-lib.$(OBJEXT) \
|
||||
verify-r2r.$(OBJEXT) verify-rdft2.$(OBJEXT) verify.$(OBJEXT) \
|
||||
zero.$(OBJEXT) my-getopt.$(OBJEXT)
|
||||
libbench2_a_OBJECTS = $(am_libbench2_a_OBJECTS)
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
am__v_P_0 = false
|
||||
am__v_P_1 = :
|
||||
AM_V_GEN = $(am__v_GEN_@AM_V@)
|
||||
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
|
||||
am__v_GEN_0 = @echo " GEN " $@;
|
||||
am__v_GEN_1 =
|
||||
AM_V_at = $(am__v_at_@AM_V@)
|
||||
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
|
||||
am__v_at_0 = @
|
||||
am__v_at_1 =
|
||||
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||
am__maybe_remake_depfiles = depfiles
|
||||
am__depfiles_remade = ./$(DEPDIR)/after-ccopy-from.Po \
|
||||
./$(DEPDIR)/after-ccopy-to.Po ./$(DEPDIR)/after-hccopy-from.Po \
|
||||
./$(DEPDIR)/after-hccopy-to.Po ./$(DEPDIR)/after-rcopy-from.Po \
|
||||
./$(DEPDIR)/after-rcopy-to.Po ./$(DEPDIR)/allocate.Po \
|
||||
./$(DEPDIR)/aset.Po ./$(DEPDIR)/bench-cost-postprocess.Po \
|
||||
./$(DEPDIR)/bench-exit.Po ./$(DEPDIR)/bench-main.Po \
|
||||
./$(DEPDIR)/can-do.Po ./$(DEPDIR)/caset.Po \
|
||||
./$(DEPDIR)/dotens2.Po ./$(DEPDIR)/info.Po ./$(DEPDIR)/main.Po \
|
||||
./$(DEPDIR)/mflops.Po ./$(DEPDIR)/mp.Po \
|
||||
./$(DEPDIR)/my-getopt.Po ./$(DEPDIR)/ovtpvt.Po \
|
||||
./$(DEPDIR)/pow2.Po ./$(DEPDIR)/problem.Po \
|
||||
./$(DEPDIR)/report.Po ./$(DEPDIR)/speed.Po \
|
||||
./$(DEPDIR)/tensor.Po ./$(DEPDIR)/timer.Po \
|
||||
./$(DEPDIR)/useropt.Po ./$(DEPDIR)/util.Po \
|
||||
./$(DEPDIR)/verify-dft.Po ./$(DEPDIR)/verify-lib.Po \
|
||||
./$(DEPDIR)/verify-r2r.Po ./$(DEPDIR)/verify-rdft2.Po \
|
||||
./$(DEPDIR)/verify.Po ./$(DEPDIR)/zero.Po
|
||||
am__mv = mv -f
|
||||
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
am__v_lt_0 = --silent
|
||||
am__v_lt_1 =
|
||||
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
|
||||
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
|
||||
$(AM_CFLAGS) $(CFLAGS)
|
||||
AM_V_CC = $(am__v_CC_@AM_V@)
|
||||
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
|
||||
am__v_CC_0 = @echo " CC " $@;
|
||||
am__v_CC_1 =
|
||||
CCLD = $(CC)
|
||||
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
$(AM_LDFLAGS) $(LDFLAGS) -o $@
|
||||
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
|
||||
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
|
||||
am__v_CCLD_0 = @echo " CCLD " $@;
|
||||
am__v_CCLD_1 =
|
||||
SOURCES = $(libbench2_a_SOURCES)
|
||||
DIST_SOURCES = $(libbench2_a_SOURCES)
|
||||
am__can_run_installinfo = \
|
||||
case $$AM_UPDATE_INFO_DIR in \
|
||||
n|no|NO) false;; \
|
||||
*) (install-info --version) >/dev/null 2>&1;; \
|
||||
esac
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
|
||||
# Read a list of newline-separated strings from the standard input,
|
||||
# and print each of them once, without duplicates. Input order is
|
||||
# *not* preserved.
|
||||
am__uniquify_input = $(AWK) '\
|
||||
BEGIN { nonempty = 0; } \
|
||||
{ items[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in items) print i; }; } \
|
||||
'
|
||||
# Make sure the list of sources is unique. This is necessary because,
|
||||
# e.g., the same source file might be shared among _SOURCES variables
|
||||
# for different programs/libraries.
|
||||
am__define_uniq_tagged_files = \
|
||||
list='$(am__tagged_files)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | $(am__uniquify_input)`
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
ACLOCAL = @ACLOCAL@
|
||||
ALLOCA = @ALLOCA@
|
||||
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
|
||||
AMTAR = @AMTAR@
|
||||
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
|
||||
AR = @AR@
|
||||
AS = @AS@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AVX2_CFLAGS = @AVX2_CFLAGS@
|
||||
AVX512_CFLAGS = @AVX512_CFLAGS@
|
||||
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
|
||||
AVX_CFLAGS = @AVX_CFLAGS@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CHECK_PL_OPTS = @CHECK_PL_OPTS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
|
||||
C_MPI_FINT = @C_MPI_FINT@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
DLLTOOL = @DLLTOOL@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
F77 = @F77@
|
||||
FFLAGS = @FFLAGS@
|
||||
FGREP = @FGREP@
|
||||
FLIBS = @FLIBS@
|
||||
GREP = @GREP@
|
||||
INDENT = @INDENT@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
KCVI_CFLAGS = @KCVI_CFLAGS@
|
||||
LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBQUADMATH = @LIBQUADMATH@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
LIPO = @LIPO@
|
||||
LN_S = @LN_S@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
|
||||
MAINT = @MAINT@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
MPICC = @MPICC@
|
||||
MPILIBS = @MPILIBS@
|
||||
MPIRUN = @MPIRUN@
|
||||
NEON_CFLAGS = @NEON_CFLAGS@
|
||||
NM = @NM@
|
||||
NMEDIT = @NMEDIT@
|
||||
OBJDUMP = @OBJDUMP@
|
||||
OBJEXT = @OBJEXT@
|
||||
OCAMLBUILD = @OCAMLBUILD@
|
||||
OPENMP_CFLAGS = @OPENMP_CFLAGS@
|
||||
OTOOL = @OTOOL@
|
||||
OTOOL64 = @OTOOL64@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_URL = @PACKAGE_URL@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
POW_LIB = @POW_LIB@
|
||||
PRECISION = @PRECISION@
|
||||
PREC_SUFFIX = @PREC_SUFFIX@
|
||||
PTHREAD_CC = @PTHREAD_CC@
|
||||
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
|
||||
PTHREAD_LIBS = @PTHREAD_LIBS@
|
||||
RANLIB = @RANLIB@
|
||||
SED = @SED@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
|
||||
SHELL = @SHELL@
|
||||
SSE2_CFLAGS = @SSE2_CFLAGS@
|
||||
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
|
||||
STRIP = @STRIP@
|
||||
THREADLIBS = @THREADLIBS@
|
||||
VERSION = @VERSION@
|
||||
VSX_CFLAGS = @VSX_CFLAGS@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_AR = @ac_ct_AR@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_F77 = @ac_ct_F77@
|
||||
acx_pthread_config = @acx_pthread_config@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build = @build@
|
||||
build_alias = @build_alias@
|
||||
build_cpu = @build_cpu@
|
||||
build_os = @build_os@
|
||||
build_vendor = @build_vendor@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host = @host@
|
||||
host_alias = @host_alias@
|
||||
host_cpu = @host_cpu@
|
||||
host_os = @host_os@
|
||||
host_vendor = @host_vendor@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
runstatedir = @runstatedir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
noinst_LIBRARIES = libbench2.a
|
||||
libbench2_a_SOURCES = after-ccopy-from.c after-ccopy-to.c \
|
||||
after-hccopy-from.c after-hccopy-to.c after-rcopy-from.c \
|
||||
after-rcopy-to.c allocate.c aset.c bench-cost-postprocess.c \
|
||||
bench-exit.c bench-main.c can-do.c caset.c dotens2.c info.c main.c \
|
||||
mflops.c mp.c ovtpvt.c pow2.c problem.c report.c speed.c tensor.c \
|
||||
timer.c useropt.c util.c verify-dft.c verify-lib.c verify-r2r.c \
|
||||
verify-rdft2.c verify.c zero.c bench-user.h bench.h verify.h \
|
||||
my-getopt.c my-getopt.h
|
||||
|
||||
all: all-am
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .lo .o .obj
|
||||
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu libbench2/Makefile'; \
|
||||
$(am__cd) $(top_srcdir) && \
|
||||
$(AUTOMAKE) --gnu libbench2/Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
|
||||
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(am__aclocal_m4_deps):
|
||||
|
||||
clean-noinstLIBRARIES:
|
||||
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
|
||||
|
||||
libbench2.a: $(libbench2_a_OBJECTS) $(libbench2_a_DEPENDENCIES) $(EXTRA_libbench2_a_DEPENDENCIES)
|
||||
$(AM_V_at)-rm -f libbench2.a
|
||||
$(AM_V_AR)$(libbench2_a_AR) libbench2.a $(libbench2_a_OBJECTS) $(libbench2_a_LIBADD)
|
||||
$(AM_V_at)$(RANLIB) libbench2.a
|
||||
|
||||
mostlyclean-compile:
|
||||
-rm -f *.$(OBJEXT)
|
||||
|
||||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-ccopy-from.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-ccopy-to.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-hccopy-from.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-hccopy-to.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-rcopy-from.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/after-rcopy-to.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocate.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aset.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-cost-postprocess.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-exit.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench-main.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/can-do.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/caset.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dotens2.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mflops.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mp.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/my-getopt.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ovtpvt.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow2.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/report.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/speed.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/useropt.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-dft.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-lib.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-r2r.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify-rdft2.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verify.Po@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/zero.Po@am__quote@ # am--include-marker
|
||||
|
||||
$(am__depfiles_remade):
|
||||
@$(MKDIR_P) $(@D)
|
||||
@echo '# dummy' >$@-t && $(am__mv) $@-t $@
|
||||
|
||||
am--depfiles: $(am__depfiles_remade)
|
||||
|
||||
.c.o:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
|
||||
|
||||
.c.obj:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
|
||||
.c.lo:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
|
||||
|
||||
mostlyclean-libtool:
|
||||
-rm -f *.lo
|
||||
|
||||
clean-libtool:
|
||||
-rm -rf .libs _libs
|
||||
|
||||
ID: $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); mkid -fID $$unique
|
||||
tags: tags-am
|
||||
TAGS: tags
|
||||
|
||||
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
set x; \
|
||||
here=`pwd`; \
|
||||
$(am__define_uniq_tagged_files); \
|
||||
shift; \
|
||||
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||
test -n "$$unique" || unique=$$empty_fix; \
|
||||
if test $$# -gt 0; then \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
"$$@" $$unique; \
|
||||
else \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
$$unique; \
|
||||
fi; \
|
||||
fi
|
||||
ctags: ctags-am
|
||||
|
||||
CTAGS: ctags
|
||||
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); \
|
||||
test -z "$(CTAGS_ARGS)$$unique" \
|
||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||
$$unique
|
||||
|
||||
GTAGS:
|
||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||
&& $(am__cd) $(top_srcdir) \
|
||||
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||
cscopelist: cscopelist-am
|
||||
|
||||
cscopelist-am: $(am__tagged_files)
|
||||
list='$(am__tagged_files)'; \
|
||||
case "$(srcdir)" in \
|
||||
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||
esac; \
|
||||
for i in $$list; do \
|
||||
if test -f "$$i"; then \
|
||||
echo "$(subdir)/$$i"; \
|
||||
else \
|
||||
echo "$$sdir/$$i"; \
|
||||
fi; \
|
||||
done >> $(top_builddir)/cscope.files
|
||||
|
||||
distclean-tags:
|
||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||
|
||||
distdir: $(BUILT_SOURCES)
|
||||
$(MAKE) $(AM_MAKEFLAGS) distdir-am
|
||||
|
||||
distdir-am: $(DISTFILES)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d "$(distdir)/$$file"; then \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||
else \
|
||||
test -f "$(distdir)/$$file" \
|
||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
check-am: all-am
|
||||
check: check-am
|
||||
all-am: Makefile $(LIBRARIES)
|
||||
installdirs:
|
||||
install: install-am
|
||||
install-exec: install-exec-am
|
||||
install-data: install-data-am
|
||||
uninstall: uninstall-am
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-am
|
||||
install-strip:
|
||||
if test -z '$(STRIP)'; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
install; \
|
||||
else \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||
fi
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-am
|
||||
|
||||
clean-am: clean-generic clean-libtool clean-noinstLIBRARIES \
|
||||
mostlyclean-am
|
||||
|
||||
distclean: distclean-am
|
||||
-rm -f ./$(DEPDIR)/after-ccopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-ccopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/after-hccopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-hccopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/after-rcopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-rcopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/allocate.Po
|
||||
-rm -f ./$(DEPDIR)/aset.Po
|
||||
-rm -f ./$(DEPDIR)/bench-cost-postprocess.Po
|
||||
-rm -f ./$(DEPDIR)/bench-exit.Po
|
||||
-rm -f ./$(DEPDIR)/bench-main.Po
|
||||
-rm -f ./$(DEPDIR)/can-do.Po
|
||||
-rm -f ./$(DEPDIR)/caset.Po
|
||||
-rm -f ./$(DEPDIR)/dotens2.Po
|
||||
-rm -f ./$(DEPDIR)/info.Po
|
||||
-rm -f ./$(DEPDIR)/main.Po
|
||||
-rm -f ./$(DEPDIR)/mflops.Po
|
||||
-rm -f ./$(DEPDIR)/mp.Po
|
||||
-rm -f ./$(DEPDIR)/my-getopt.Po
|
||||
-rm -f ./$(DEPDIR)/ovtpvt.Po
|
||||
-rm -f ./$(DEPDIR)/pow2.Po
|
||||
-rm -f ./$(DEPDIR)/problem.Po
|
||||
-rm -f ./$(DEPDIR)/report.Po
|
||||
-rm -f ./$(DEPDIR)/speed.Po
|
||||
-rm -f ./$(DEPDIR)/tensor.Po
|
||||
-rm -f ./$(DEPDIR)/timer.Po
|
||||
-rm -f ./$(DEPDIR)/useropt.Po
|
||||
-rm -f ./$(DEPDIR)/util.Po
|
||||
-rm -f ./$(DEPDIR)/verify-dft.Po
|
||||
-rm -f ./$(DEPDIR)/verify-lib.Po
|
||||
-rm -f ./$(DEPDIR)/verify-r2r.Po
|
||||
-rm -f ./$(DEPDIR)/verify-rdft2.Po
|
||||
-rm -f ./$(DEPDIR)/verify.Po
|
||||
-rm -f ./$(DEPDIR)/zero.Po
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-compile distclean-generic \
|
||||
distclean-tags
|
||||
|
||||
dvi: dvi-am
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-am
|
||||
|
||||
html-am:
|
||||
|
||||
info: info-am
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am:
|
||||
|
||||
install-dvi: install-dvi-am
|
||||
|
||||
install-dvi-am:
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-am
|
||||
|
||||
install-html-am:
|
||||
|
||||
install-info: install-info-am
|
||||
|
||||
install-info-am:
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-am
|
||||
|
||||
install-pdf-am:
|
||||
|
||||
install-ps: install-ps-am
|
||||
|
||||
install-ps-am:
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-am
|
||||
-rm -f ./$(DEPDIR)/after-ccopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-ccopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/after-hccopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-hccopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/after-rcopy-from.Po
|
||||
-rm -f ./$(DEPDIR)/after-rcopy-to.Po
|
||||
-rm -f ./$(DEPDIR)/allocate.Po
|
||||
-rm -f ./$(DEPDIR)/aset.Po
|
||||
-rm -f ./$(DEPDIR)/bench-cost-postprocess.Po
|
||||
-rm -f ./$(DEPDIR)/bench-exit.Po
|
||||
-rm -f ./$(DEPDIR)/bench-main.Po
|
||||
-rm -f ./$(DEPDIR)/can-do.Po
|
||||
-rm -f ./$(DEPDIR)/caset.Po
|
||||
-rm -f ./$(DEPDIR)/dotens2.Po
|
||||
-rm -f ./$(DEPDIR)/info.Po
|
||||
-rm -f ./$(DEPDIR)/main.Po
|
||||
-rm -f ./$(DEPDIR)/mflops.Po
|
||||
-rm -f ./$(DEPDIR)/mp.Po
|
||||
-rm -f ./$(DEPDIR)/my-getopt.Po
|
||||
-rm -f ./$(DEPDIR)/ovtpvt.Po
|
||||
-rm -f ./$(DEPDIR)/pow2.Po
|
||||
-rm -f ./$(DEPDIR)/problem.Po
|
||||
-rm -f ./$(DEPDIR)/report.Po
|
||||
-rm -f ./$(DEPDIR)/speed.Po
|
||||
-rm -f ./$(DEPDIR)/tensor.Po
|
||||
-rm -f ./$(DEPDIR)/timer.Po
|
||||
-rm -f ./$(DEPDIR)/useropt.Po
|
||||
-rm -f ./$(DEPDIR)/util.Po
|
||||
-rm -f ./$(DEPDIR)/verify-dft.Po
|
||||
-rm -f ./$(DEPDIR)/verify-lib.Po
|
||||
-rm -f ./$(DEPDIR)/verify-r2r.Po
|
||||
-rm -f ./$(DEPDIR)/verify-rdft2.Po
|
||||
-rm -f ./$(DEPDIR)/verify.Po
|
||||
-rm -f ./$(DEPDIR)/zero.Po
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||
|
||||
mostlyclean: mostlyclean-am
|
||||
|
||||
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||
mostlyclean-libtool
|
||||
|
||||
pdf: pdf-am
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-am
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am:
|
||||
|
||||
.MAKE: install-am install-strip
|
||||
|
||||
.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
|
||||
clean-generic clean-libtool clean-noinstLIBRARIES \
|
||||
cscopelist-am ctags ctags-am distclean distclean-compile \
|
||||
distclean-generic distclean-libtool distclean-tags distdir dvi \
|
||||
dvi-am html html-am info info-am install install-am \
|
||||
install-data install-data-am install-dvi install-dvi-am \
|
||||
install-exec install-exec-am install-html install-html-am \
|
||||
install-info install-info-am install-man install-pdf \
|
||||
install-pdf-am install-ps install-ps-am install-strip \
|
||||
installcheck installcheck-am installdirs maintainer-clean \
|
||||
maintainer-clean-generic mostlyclean mostlyclean-compile \
|
||||
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
|
||||
tags tags-am uninstall uninstall-am
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
|
||||
benchmark: all
|
||||
@echo "nothing to benchmark"
|
||||
|
||||
accuracy: all
|
||||
@echo "nothing to benchmark"
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
10
fftw-3.3.10/libbench2/after-ccopy-from.c
Normal file
10
fftw-3.3.10/libbench2/after-ccopy-from.c
Normal file
@@ -0,0 +1,10 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_ccopy_from(bench_problem *p, bench_real *ri, bench_real *ii)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ri);
|
||||
UNUSED(ii);
|
||||
}
|
||||
10
fftw-3.3.10/libbench2/after-ccopy-to.c
Normal file
10
fftw-3.3.10/libbench2/after-ccopy-to.c
Normal file
@@ -0,0 +1,10 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_ccopy_to(bench_problem *p, bench_real *ro, bench_real *io)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ro);
|
||||
UNUSED(io);
|
||||
}
|
||||
10
fftw-3.3.10/libbench2/after-hccopy-from.c
Normal file
10
fftw-3.3.10/libbench2/after-hccopy-from.c
Normal file
@@ -0,0 +1,10 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ri);
|
||||
UNUSED(ii);
|
||||
}
|
||||
10
fftw-3.3.10/libbench2/after-hccopy-to.c
Normal file
10
fftw-3.3.10/libbench2/after-hccopy-to.c
Normal file
@@ -0,0 +1,10 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_hccopy_to(bench_problem *p, bench_real *ro, bench_real *io)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ro);
|
||||
UNUSED(io);
|
||||
}
|
||||
9
fftw-3.3.10/libbench2/after-rcopy-from.c
Normal file
9
fftw-3.3.10/libbench2/after-rcopy-from.c
Normal file
@@ -0,0 +1,9 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_rcopy_from(bench_problem *p, bench_real *ri)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ri);
|
||||
}
|
||||
9
fftw-3.3.10/libbench2/after-rcopy-to.c
Normal file
9
fftw-3.3.10/libbench2/after-rcopy-to.c
Normal file
@@ -0,0 +1,9 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void after_problem_rcopy_to(bench_problem *p, bench_real *ro)
|
||||
{
|
||||
UNUSED(p);
|
||||
UNUSED(ro);
|
||||
}
|
||||
110
fftw-3.3.10/libbench2/allocate.c
Normal file
110
fftw-3.3.10/libbench2/allocate.c
Normal file
@@ -0,0 +1,110 @@
|
||||
/* not worth copyrighting */
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
static void bounds(bench_problem *p, int *ilb, int *iub, int *olb, int *oub)
|
||||
{
|
||||
bench_tensor *t = tensor_append(p->sz, p->vecsz);
|
||||
tensor_ibounds(t, ilb, iub);
|
||||
tensor_obounds(t, olb, oub);
|
||||
tensor_destroy(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate I/O arrays for a problem.
|
||||
*
|
||||
* This is the default routine that can be overridden by the user in
|
||||
* complicated cases.
|
||||
*/
|
||||
void problem_alloc(bench_problem *p)
|
||||
{
|
||||
int ilb, iub, olb, oub;
|
||||
int isz, osz;
|
||||
|
||||
bounds(p, &ilb, &iub, &olb, &oub);
|
||||
isz = iub - ilb;
|
||||
osz = oub - olb;
|
||||
|
||||
if (p->kind == PROBLEM_COMPLEX) {
|
||||
bench_complex *in, *out;
|
||||
|
||||
p->iphyssz = isz;
|
||||
p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex));
|
||||
p->in = in - ilb;
|
||||
|
||||
if (p->in_place) {
|
||||
p->out = p->in;
|
||||
p->outphys = p->inphys;
|
||||
p->ophyssz = p->iphyssz;
|
||||
} else {
|
||||
p->ophyssz = osz;
|
||||
p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex));
|
||||
p->out = out - olb;
|
||||
}
|
||||
} else if (p->kind == PROBLEM_R2R) {
|
||||
bench_real *in, *out;
|
||||
|
||||
p->iphyssz = isz;
|
||||
p->inphys = in = (bench_real *) bench_malloc(isz * sizeof(bench_real));
|
||||
p->in = in - ilb;
|
||||
|
||||
if (p->in_place) {
|
||||
p->out = p->in;
|
||||
p->outphys = p->inphys;
|
||||
p->ophyssz = p->iphyssz;
|
||||
} else {
|
||||
p->ophyssz = osz;
|
||||
p->outphys = out = (bench_real *) bench_malloc(osz * sizeof(bench_real));
|
||||
p->out = out - olb;
|
||||
}
|
||||
} else if (p->kind == PROBLEM_REAL && p->sign < 0) { /* R2HC */
|
||||
bench_real *in;
|
||||
bench_complex *out;
|
||||
|
||||
isz = isz > osz*2 ? isz : osz*2;
|
||||
p->iphyssz = isz;
|
||||
p->inphys = in = (bench_real *) bench_malloc(p->iphyssz * sizeof(bench_real));
|
||||
p->in = in - ilb;
|
||||
|
||||
if (p->in_place) {
|
||||
p->out = p->in;
|
||||
p->outphys = p->inphys;
|
||||
p->ophyssz = p->iphyssz / 2;
|
||||
} else {
|
||||
p->ophyssz = osz;
|
||||
p->outphys = out = (bench_complex *) bench_malloc(osz * sizeof(bench_complex));
|
||||
p->out = out - olb;
|
||||
}
|
||||
} else if (p->kind == PROBLEM_REAL && p->sign > 0) { /* HC2R */
|
||||
bench_real *out;
|
||||
bench_complex *in;
|
||||
|
||||
osz = osz > isz*2 ? osz : isz*2;
|
||||
p->ophyssz = osz;
|
||||
p->outphys = out = (bench_real *) bench_malloc(p->ophyssz * sizeof(bench_real));
|
||||
p->out = out - olb;
|
||||
|
||||
if (p->in_place) {
|
||||
p->in = p->out;
|
||||
p->inphys = p->outphys;
|
||||
p->iphyssz = p->ophyssz / 2;
|
||||
} else {
|
||||
p->iphyssz = isz;
|
||||
p->inphys = in = (bench_complex *) bench_malloc(isz * sizeof(bench_complex));
|
||||
p->in = in - ilb;
|
||||
}
|
||||
} else {
|
||||
BENCH_ASSERT(0); /* TODO */
|
||||
}
|
||||
}
|
||||
|
||||
void problem_free(bench_problem *p)
|
||||
{
|
||||
if (p->outphys && p->outphys != p->inphys)
|
||||
bench_free(p->outphys);
|
||||
if (p->inphys)
|
||||
bench_free(p->inphys);
|
||||
tensor_destroy(p->sz);
|
||||
tensor_destroy(p->vecsz);
|
||||
}
|
||||
10
fftw-3.3.10/libbench2/aset.c
Normal file
10
fftw-3.3.10/libbench2/aset.c
Normal file
@@ -0,0 +1,10 @@
|
||||
/* not worth copyrighting */
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
void aset(bench_real *A, int n, bench_real x)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
A[i] = x;
|
||||
}
|
||||
8
fftw-3.3.10/libbench2/bench-cost-postprocess.c
Normal file
8
fftw-3.3.10/libbench2/bench-cost-postprocess.c
Normal file
@@ -0,0 +1,8 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
double bench_cost_postprocess(double cost)
|
||||
{
|
||||
return cost;
|
||||
}
|
||||
8
fftw-3.3.10/libbench2/bench-exit.c
Normal file
8
fftw-3.3.10/libbench2/bench-exit.c
Normal file
@@ -0,0 +1,8 @@
|
||||
/* not worth copyrighting */
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* default routine, can be overridden by user */
|
||||
void bench_exit(int status)
|
||||
{
|
||||
exit(status);
|
||||
}
|
||||
195
fftw-3.3.10/libbench2/bench-main.c
Normal file
195
fftw-3.3.10/libbench2/bench-main.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include "my-getopt.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int verbose;
|
||||
|
||||
static const struct my_option options[] =
|
||||
{
|
||||
{"accuracy", REQARG, 'a'},
|
||||
{"accuracy-rounds", REQARG, 405},
|
||||
{"impulse-accuracy-rounds", REQARG, 406},
|
||||
{"can-do", REQARG, 'd'},
|
||||
{"help", NOARG, 'h'},
|
||||
{"info", REQARG, 'i'},
|
||||
{"info-all", NOARG, 'I'},
|
||||
{"print-precision", NOARG, 402},
|
||||
{"print-time-min", NOARG, 400},
|
||||
{"random-seed", REQARG, 404},
|
||||
{"report-benchmark", NOARG, 320},
|
||||
{"report-mflops", NOARG, 300},
|
||||
{"report-time", NOARG, 310},
|
||||
{"report-verbose", NOARG, 330},
|
||||
{"speed", REQARG, 's'},
|
||||
{"setup-speed", REQARG, 'S'},
|
||||
{"time-min", REQARG, 't'},
|
||||
{"time-repeat", REQARG, 'r'},
|
||||
{"user-option", REQARG, 'o'},
|
||||
{"verbose", OPTARG, 'v'},
|
||||
{"verify", REQARG, 'y'},
|
||||
{"verify-rounds", REQARG, 401},
|
||||
{"verify-tolerance", REQARG, 403},
|
||||
{0, NOARG, 0}
|
||||
};
|
||||
|
||||
int bench_main(int argc, char *argv[])
|
||||
{
|
||||
double tmin = 0.0;
|
||||
double tol;
|
||||
int repeat = 0;
|
||||
int rounds = 10;
|
||||
int iarounds = 0;
|
||||
int arounds = 1; /* this is too low for precise results */
|
||||
int c;
|
||||
|
||||
report = report_verbose; /* default */
|
||||
verbose = 0;
|
||||
|
||||
tol = SINGLE_PRECISION ? 1.0e-3 : (QUAD_PRECISION ? 1e-29 : 1.0e-10);
|
||||
|
||||
main_init(&argc, &argv);
|
||||
|
||||
bench_srand(1);
|
||||
|
||||
while ((c = my_getopt (argc, argv, options)) != -1) {
|
||||
switch (c) {
|
||||
case 't' :
|
||||
tmin = strtod(my_optarg, 0);
|
||||
break;
|
||||
case 'r':
|
||||
repeat = atoi(my_optarg);
|
||||
break;
|
||||
case 's':
|
||||
timer_init(tmin, repeat);
|
||||
speed(my_optarg, 0);
|
||||
break;
|
||||
case 'S':
|
||||
timer_init(tmin, repeat);
|
||||
speed(my_optarg, 1);
|
||||
break;
|
||||
case 'd':
|
||||
report_can_do(my_optarg);
|
||||
break;
|
||||
case 'o':
|
||||
useropt(my_optarg);
|
||||
break;
|
||||
case 'v':
|
||||
if (verbose >= 0) { /* verbose < 0 disables output */
|
||||
if (my_optarg)
|
||||
verbose = atoi(my_optarg);
|
||||
else
|
||||
++verbose;
|
||||
}
|
||||
break;
|
||||
case 'y':
|
||||
verify(my_optarg, rounds, tol);
|
||||
break;
|
||||
case 'a':
|
||||
accuracy(my_optarg, arounds, iarounds);
|
||||
break;
|
||||
case 'i':
|
||||
report_info(my_optarg);
|
||||
break;
|
||||
case 'I':
|
||||
report_info_all();
|
||||
break;
|
||||
case 'h':
|
||||
if (verbose >= 0) my_usage(argv[0], options);
|
||||
break;
|
||||
|
||||
case 300: /* --report-mflops */
|
||||
report = report_mflops;
|
||||
break;
|
||||
|
||||
case 310: /* --report-time */
|
||||
report = report_time;
|
||||
break;
|
||||
|
||||
case 320: /* --report-benchmark */
|
||||
report = report_benchmark;
|
||||
break;
|
||||
|
||||
case 330: /* --report-verbose */
|
||||
report = report_verbose;
|
||||
break;
|
||||
|
||||
case 400: /* --print-time-min */
|
||||
timer_init(tmin, repeat);
|
||||
ovtpvt("%g\n", time_min);
|
||||
break;
|
||||
|
||||
case 401: /* --verify-rounds */
|
||||
rounds = atoi(my_optarg);
|
||||
break;
|
||||
|
||||
case 402: /* --print-precision */
|
||||
if (SINGLE_PRECISION)
|
||||
ovtpvt("single\n");
|
||||
else if (QUAD_PRECISION)
|
||||
ovtpvt("quad\n");
|
||||
else if (LDOUBLE_PRECISION)
|
||||
ovtpvt("long-double\n");
|
||||
else if (DOUBLE_PRECISION)
|
||||
ovtpvt("double\n");
|
||||
else
|
||||
ovtpvt("unknown %d\n", sizeof(bench_real));
|
||||
break;
|
||||
|
||||
case 403: /* --verify-tolerance */
|
||||
tol = strtod(my_optarg, 0);
|
||||
break;
|
||||
|
||||
case 404: /* --random-seed */
|
||||
bench_srand(atoi(my_optarg));
|
||||
break;
|
||||
|
||||
case 405: /* --accuracy-rounds */
|
||||
arounds = atoi(my_optarg);
|
||||
break;
|
||||
|
||||
case 406: /* --impulse-accuracy-rounds */
|
||||
iarounds = atoi(my_optarg);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
/* my_getopt() already printed an error message. */
|
||||
cleanup();
|
||||
return 1;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
/* assume that any remaining arguments are problems to be
|
||||
benchmarked */
|
||||
while (my_optind < argc) {
|
||||
timer_init(tmin, repeat);
|
||||
speed(argv[my_optind++], 0);
|
||||
}
|
||||
|
||||
cleanup();
|
||||
return 0;
|
||||
}
|
||||
276
fftw-3.3.10/libbench2/bench-user.h
Normal file
276
fftw-3.3.10/libbench2/bench-user.h
Normal file
@@ -0,0 +1,276 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __BENCH_USER_H__
|
||||
#define __BENCH_USER_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* benchmark program definitions for user code */
|
||||
#include "config.h"
|
||||
#include <limits.h>
|
||||
|
||||
#if HAVE_STDDEF_H
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#if defined(BENCHFFT_SINGLE)
|
||||
typedef float bench_real;
|
||||
#elif defined(BENCHFFT_LDOUBLE)
|
||||
typedef long double bench_real;
|
||||
#elif defined(BENCHFFT_QUAD)
|
||||
typedef __float128 bench_real;
|
||||
#else
|
||||
typedef double bench_real;
|
||||
#endif
|
||||
|
||||
typedef bench_real bench_complex[2];
|
||||
|
||||
#define c_re(c) ((c)[0])
|
||||
#define c_im(c) ((c)[1])
|
||||
|
||||
#undef DOUBLE_PRECISION
|
||||
#define DOUBLE_PRECISION (sizeof(bench_real) == sizeof(double))
|
||||
#undef SINGLE_PRECISION
|
||||
#define SINGLE_PRECISION (!DOUBLE_PRECISION && sizeof(bench_real) == sizeof(float))
|
||||
#undef LDOUBLE_PRECISION
|
||||
#define LDOUBLE_PRECISION (!DOUBLE_PRECISION && sizeof(bench_real) == sizeof(long double))
|
||||
|
||||
#undef QUAD_PRECISION
|
||||
#ifdef BENCHFFT_QUAD
|
||||
#define QUAD_PRECISION (!LDOUBLE_PRECISION && sizeof(bench_real) == sizeof(__float128))
|
||||
#else
|
||||
#define QUAD_PRECISION 0
|
||||
#endif
|
||||
|
||||
typedef enum { PROBLEM_COMPLEX, PROBLEM_REAL, PROBLEM_R2R } problem_kind_t;
|
||||
|
||||
typedef enum {
|
||||
R2R_R2HC, R2R_HC2R, R2R_DHT,
|
||||
R2R_REDFT00, R2R_REDFT01, R2R_REDFT10, R2R_REDFT11,
|
||||
R2R_RODFT00, R2R_RODFT01, R2R_RODFT10, R2R_RODFT11
|
||||
} r2r_kind_t;
|
||||
|
||||
typedef struct {
|
||||
int n;
|
||||
int is; /* input stride */
|
||||
int os; /* output stride */
|
||||
} bench_iodim;
|
||||
|
||||
typedef struct {
|
||||
int rnk;
|
||||
bench_iodim *dims;
|
||||
} bench_tensor;
|
||||
|
||||
bench_tensor *mktensor(int rnk);
|
||||
void tensor_destroy(bench_tensor *sz);
|
||||
size_t tensor_sz(const bench_tensor *sz);
|
||||
bench_tensor *tensor_compress(const bench_tensor *sz);
|
||||
int tensor_unitstridep(bench_tensor *t);
|
||||
int tensor_rowmajorp(bench_tensor *t);
|
||||
int tensor_real_rowmajorp(bench_tensor *t, int sign, int in_place);
|
||||
bench_tensor *tensor_append(const bench_tensor *a, const bench_tensor *b);
|
||||
bench_tensor *tensor_copy(const bench_tensor *sz);
|
||||
bench_tensor *tensor_copy_sub(const bench_tensor *sz, int start_dim, int rnk);
|
||||
bench_tensor *tensor_copy_swapio(const bench_tensor *sz);
|
||||
void tensor_ibounds(bench_tensor *t, int *lbp, int *ubp);
|
||||
void tensor_obounds(bench_tensor *t, int *lbp, int *ubp);
|
||||
|
||||
/*
|
||||
Definition of rank -infinity.
|
||||
This definition has the property that if you want rank 0 or 1,
|
||||
you can simply test for rank <= 1. This is a common case.
|
||||
|
||||
A tensor of rank -infinity has size 0.
|
||||
*/
|
||||
#define BENCH_RNK_MINFTY INT_MAX
|
||||
#define BENCH_FINITE_RNK(rnk) ((rnk) != BENCH_RNK_MINFTY)
|
||||
|
||||
typedef struct {
|
||||
problem_kind_t kind;
|
||||
r2r_kind_t *k;
|
||||
bench_tensor *sz;
|
||||
bench_tensor *vecsz;
|
||||
int sign;
|
||||
int in_place;
|
||||
int destroy_input;
|
||||
int split;
|
||||
void *in, *out;
|
||||
void *inphys, *outphys;
|
||||
int iphyssz, ophyssz;
|
||||
char *pstring;
|
||||
void *userinfo; /* user can store whatever */
|
||||
int scrambled_in, scrambled_out; /* hack for MPI */
|
||||
|
||||
/* internal hack so that we can use verifier in FFTW test program */
|
||||
void *ini, *outi; /* if nonzero, point to imag. parts for dft */
|
||||
|
||||
/* another internal hack to avoid passing around too many parameters */
|
||||
double setup_time;
|
||||
} bench_problem;
|
||||
|
||||
extern int verbose;
|
||||
|
||||
extern int no_speed_allocation;
|
||||
|
||||
extern int always_pad_real;
|
||||
|
||||
#define LIBBENCH_TIMER 0
|
||||
#define USER_TIMER 1
|
||||
#define BENCH_NTIMERS 2
|
||||
extern void timer_start(int which_timer);
|
||||
extern double timer_stop(int which_timer);
|
||||
|
||||
extern int can_do(bench_problem *p);
|
||||
extern void setup(bench_problem *p);
|
||||
extern void doit(int iter, bench_problem *p);
|
||||
extern void done(bench_problem *p);
|
||||
extern void main_init(int *argc, char ***argv);
|
||||
extern void cleanup(void);
|
||||
extern void verify(const char *param, int rounds, double tol);
|
||||
extern void useropt(const char *arg);
|
||||
|
||||
extern void verify_problem(bench_problem *p, int rounds, double tol);
|
||||
|
||||
extern void problem_alloc(bench_problem *p);
|
||||
extern void problem_free(bench_problem *p);
|
||||
extern void problem_zero(bench_problem *p);
|
||||
extern void problem_destroy(bench_problem *p);
|
||||
|
||||
extern int power_of_two(int n);
|
||||
extern int log_2(int n);
|
||||
|
||||
|
||||
#define CASSIGN(out, in) (c_re(out) = c_re(in), c_im(out) = c_im(in))
|
||||
|
||||
bench_tensor *verify_pack(const bench_tensor *sz, int s);
|
||||
|
||||
typedef struct {
|
||||
double l;
|
||||
double i;
|
||||
double s;
|
||||
} errors;
|
||||
|
||||
void verify_dft(bench_problem *p, int rounds, double tol, errors *e);
|
||||
void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e);
|
||||
void verify_r2r(bench_problem *p, int rounds, double tol, errors *e);
|
||||
|
||||
/**************************************************************/
|
||||
/* routines to override */
|
||||
|
||||
extern void after_problem_ccopy_from(bench_problem *p, bench_real *ri, bench_real *ii);
|
||||
extern void after_problem_ccopy_to(bench_problem *p, bench_real *ro, bench_real *io);
|
||||
extern void after_problem_hccopy_from(bench_problem *p, bench_real *ri, bench_real *ii);
|
||||
extern void after_problem_hccopy_to(bench_problem *p, bench_real *ro, bench_real *io);
|
||||
extern void after_problem_rcopy_from(bench_problem *p, bench_real *ri);
|
||||
extern void after_problem_rcopy_to(bench_problem *p, bench_real *ro);
|
||||
extern void bench_exit(int status);
|
||||
extern double bench_cost_postprocess(double cost);
|
||||
|
||||
/**************************************************************
|
||||
* malloc
|
||||
**************************************************************/
|
||||
extern void *bench_malloc(size_t size);
|
||||
extern void bench_free(void *ptr);
|
||||
extern void bench_free0(void *ptr);
|
||||
|
||||
/**************************************************************
|
||||
* alloca
|
||||
**************************************************************/
|
||||
#ifdef HAVE_ALLOCA_H
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
/**************************************************************
|
||||
* assert
|
||||
**************************************************************/
|
||||
extern void bench_assertion_failed(const char *s, int line, const char *file);
|
||||
#define BENCH_ASSERT(ex) \
|
||||
(void)((ex) || (bench_assertion_failed(#ex, __LINE__, __FILE__), 0))
|
||||
|
||||
#define UNUSED(x) (void)x
|
||||
|
||||
/***************************************
|
||||
* Documentation strings
|
||||
***************************************/
|
||||
struct bench_doc {
|
||||
const char *key;
|
||||
const char *val;
|
||||
const char *(*f)(void);
|
||||
};
|
||||
|
||||
extern struct bench_doc bench_doc[];
|
||||
|
||||
#ifdef CC
|
||||
#define CC_DOC BENCH_DOC("cc", CC)
|
||||
#elif defined(BENCH_CC)
|
||||
#define CC_DOC BENCH_DOC("cc", BENCH_CC)
|
||||
#else
|
||||
#define CC_DOC /* none */
|
||||
#endif
|
||||
|
||||
#ifdef CXX
|
||||
#define CXX_DOC BENCH_DOC("cxx", CXX)
|
||||
#elif defined(BENCH_CXX)
|
||||
#define CXX_DOC BENCH_DOC("cxx", BENCH_CXX)
|
||||
#else
|
||||
#define CXX_DOC /* none */
|
||||
#endif
|
||||
|
||||
#ifdef F77
|
||||
#define F77_DOC BENCH_DOC("f77", F77)
|
||||
#elif defined(BENCH_F77)
|
||||
#define F77_DOC BENCH_DOC("f77", BENCH_F77)
|
||||
#else
|
||||
#define F77_DOC /* none */
|
||||
#endif
|
||||
|
||||
#ifdef F90
|
||||
#define F90_DOC BENCH_DOC("f90", F90)
|
||||
#elif defined(BENCH_F90)
|
||||
#define F90_DOC BENCH_DOC("f90", BENCH_F90)
|
||||
#else
|
||||
#define F90_DOC /* none */
|
||||
#endif
|
||||
|
||||
#define BEGIN_BENCH_DOC \
|
||||
struct bench_doc bench_doc[] = { \
|
||||
CC_DOC \
|
||||
CXX_DOC \
|
||||
F77_DOC \
|
||||
F90_DOC
|
||||
|
||||
#define BENCH_DOC(key, val) { key, val, 0 },
|
||||
#define BENCH_DOCF(key, f) { key, 0, f },
|
||||
|
||||
#define END_BENCH_DOC \
|
||||
{0, 0, 0}};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* __BENCH_USER_H__ */
|
||||
63
fftw-3.3.10/libbench2/bench.h
Normal file
63
fftw-3.3.10/libbench2/bench.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* benchmark program definitions */
|
||||
#include "libbench2/bench-user.h"
|
||||
|
||||
extern double time_min;
|
||||
extern int time_repeat;
|
||||
|
||||
extern void timer_init(double tmin, int repeat);
|
||||
|
||||
/* report functions */
|
||||
extern void (*report)(const bench_problem *p, double *t, int st);
|
||||
|
||||
void report_mflops(const bench_problem *p, double *t, int st);
|
||||
void report_time(const bench_problem *p, double *t, int st);
|
||||
void report_benchmark(const bench_problem *p, double *t, int st);
|
||||
void report_verbose(const bench_problem *p, double *t, int st);
|
||||
|
||||
void report_can_do(const char *param);
|
||||
void report_info(const char *param);
|
||||
void report_info_all(void);
|
||||
|
||||
extern int aligned_main(int argc, char *argv[]);
|
||||
extern int bench_main(int argc, char *argv[]);
|
||||
|
||||
extern void speed(const char *param, int setup_only);
|
||||
extern void accuracy(const char *param, int rounds, int impulse_rounds);
|
||||
|
||||
extern double mflops(const bench_problem *p, double t);
|
||||
|
||||
extern double bench_drand(void);
|
||||
extern void bench_srand(int seed);
|
||||
|
||||
extern bench_problem *problem_parse(const char *desc);
|
||||
|
||||
extern void ovtpvt(const char *format, ...);
|
||||
extern void ovtpvt_err(const char *format, ...);
|
||||
|
||||
extern void fftaccuracy(int n, bench_complex *a, bench_complex *ffta,
|
||||
int sign, double err[6]);
|
||||
extern void fftaccuracy_done(void);
|
||||
|
||||
extern void caset(bench_complex *A, int n, bench_complex x);
|
||||
extern void aset(bench_real *A, int n, bench_real x);
|
||||
31
fftw-3.3.10/libbench2/can-do.c
Normal file
31
fftw-3.3.10/libbench2/can-do.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void report_can_do(const char *param)
|
||||
{
|
||||
bench_problem *p;
|
||||
p = problem_parse(param);
|
||||
ovtpvt("#%c\n", can_do(p) ? 't' : 'f');
|
||||
problem_destroy(p);
|
||||
}
|
||||
12
fftw-3.3.10/libbench2/caset.c
Normal file
12
fftw-3.3.10/libbench2/caset.c
Normal file
@@ -0,0 +1,12 @@
|
||||
/* not worth copyrighting */
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
void caset(bench_complex *A, int n, bench_complex x)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_re(A[i]) = c_re(x);
|
||||
c_im(A[i]) = c_im(x);
|
||||
}
|
||||
}
|
||||
54
fftw-3.3.10/libbench2/dotens2.c
Normal file
54
fftw-3.3.10/libbench2/dotens2.c
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "verify.h"
|
||||
|
||||
static void recur(int rnk, const bench_iodim *dims0, const bench_iodim *dims1,
|
||||
dotens2_closure *k,
|
||||
int indx0, int ondx0, int indx1, int ondx1)
|
||||
{
|
||||
if (rnk == 0)
|
||||
k->apply(k, indx0, ondx0, indx1, ondx1);
|
||||
else {
|
||||
int i, n = dims0[0].n;
|
||||
int is0 = dims0[0].is;
|
||||
int os0 = dims0[0].os;
|
||||
int is1 = dims1[0].is;
|
||||
int os1 = dims1[0].os;
|
||||
|
||||
BENCH_ASSERT(n == dims1[0].n);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
recur(rnk - 1, dims0 + 1, dims1 + 1, k,
|
||||
indx0, ondx0, indx1, ondx1);
|
||||
indx0 += is0; ondx0 += os0;
|
||||
indx1 += is1; ondx1 += os1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bench_dotens2(const bench_tensor *sz0, const bench_tensor *sz1, dotens2_closure *k)
|
||||
{
|
||||
BENCH_ASSERT(sz0->rnk == sz1->rnk);
|
||||
if (sz0->rnk == BENCH_RNK_MINFTY)
|
||||
return;
|
||||
recur(sz0->rnk, sz0->dims, sz1->dims, k, 0, 0, 0, 0);
|
||||
}
|
||||
58
fftw-3.3.10/libbench2/info.c
Normal file
58
fftw-3.3.10/libbench2/info.c
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
void report_info(const char *param)
|
||||
{
|
||||
struct bench_doc *p;
|
||||
|
||||
for (p = bench_doc; p->key; ++p) {
|
||||
if (!strcmp(param, p->key)) {
|
||||
if (!p->val)
|
||||
p->val = p->f();
|
||||
|
||||
ovtpvt("%s\n", p->val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void report_info_all(void)
|
||||
{
|
||||
struct bench_doc *p;
|
||||
|
||||
/*
|
||||
* TODO: escape quotes? The format is not unambigously
|
||||
* parseable if the info string contains double quotes.
|
||||
*/
|
||||
for (p = bench_doc; p->key; ++p) {
|
||||
if (!p->val)
|
||||
p->val = p->f();
|
||||
ovtpvt("(%s \"%s\")\n", p->key, p->val);
|
||||
}
|
||||
ovtpvt("(benchmark-precision \"%s\")\n",
|
||||
SINGLE_PRECISION ? "single" :
|
||||
(LDOUBLE_PRECISION ? "long-double" :
|
||||
(QUAD_PRECISION ? "quad" : "double")));
|
||||
}
|
||||
|
||||
39
fftw-3.3.10/libbench2/main.c
Normal file
39
fftw-3.3.10/libbench2/main.c
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* On some systems, we are required to define a dummy main-like
|
||||
routine (called "MAIN__" or something similar in order to link a C
|
||||
main() with the Fortran libraries). This is detected by autoconf;
|
||||
see the autoconf 2.52 or later manual. */
|
||||
#ifdef F77_DUMMY_MAIN
|
||||
# ifdef __cplusplus
|
||||
extern "C"
|
||||
# endif
|
||||
int F77_DUMMY_MAIN() { return 1; }
|
||||
#endif
|
||||
|
||||
/* in a separate file so that the user can override it */
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return bench_main(argc, argv);
|
||||
}
|
||||
32
fftw-3.3.10/libbench2/mflops.c
Normal file
32
fftw-3.3.10/libbench2/mflops.c
Normal file
@@ -0,0 +1,32 @@
|
||||
/* not worth copyrighting */
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <math.h>
|
||||
|
||||
double mflops(const bench_problem *p, double t)
|
||||
{
|
||||
size_t size = tensor_sz(p->sz);
|
||||
size_t vsize = tensor_sz(p->vecsz);
|
||||
|
||||
if (size <= 1) /* a copy: just return reals copied / time */
|
||||
switch (p->kind) {
|
||||
case PROBLEM_COMPLEX:
|
||||
return (2.0 * size * vsize / (t * 1.0e6));
|
||||
case PROBLEM_REAL:
|
||||
case PROBLEM_R2R:
|
||||
return (1.0 * size * vsize / (t * 1.0e6));
|
||||
}
|
||||
|
||||
switch (p->kind) {
|
||||
case PROBLEM_COMPLEX:
|
||||
return (5.0 * size * vsize * log((double)size) /
|
||||
(log(2.0) * t * 1.0e6));
|
||||
case PROBLEM_REAL:
|
||||
case PROBLEM_R2R:
|
||||
return (2.5 * vsize * size * log((double) size) /
|
||||
(log(2.0) * t * 1.0e6));
|
||||
}
|
||||
BENCH_ASSERT(0 /* can't happen */);
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
641
fftw-3.3.10/libbench2/mp.c
Normal file
641
fftw-3.3.10/libbench2/mp.c
Normal file
@@ -0,0 +1,641 @@
|
||||
#include "config.h"
|
||||
#include "libbench2/bench.h"
|
||||
#include <math.h>
|
||||
|
||||
#define DG unsigned short
|
||||
#define ACC unsigned long
|
||||
#define REAL bench_real
|
||||
#define BITS_IN_REAL 53 /* mantissa */
|
||||
|
||||
#define SHFT 16
|
||||
#define RADIX 65536L
|
||||
#define IRADIX (1.0 / RADIX)
|
||||
#define LO(x) ((x) & (RADIX - 1))
|
||||
#define HI(x) ((x) >> SHFT)
|
||||
#define HI_SIGNED(x) \
|
||||
((((x) + (ACC)(RADIX >> 1) * RADIX) >> SHFT) - (RADIX >> 1))
|
||||
#define ZEROEXP (-32768)
|
||||
|
||||
#define LEN 10
|
||||
|
||||
typedef struct {
|
||||
short sign;
|
||||
short expt;
|
||||
DG d[LEN];
|
||||
} N[1];
|
||||
|
||||
#define EXA a->expt
|
||||
#define EXB b->expt
|
||||
#define EXC c->expt
|
||||
|
||||
#define AD a->d
|
||||
#define BD b->d
|
||||
|
||||
#define SGNA a->sign
|
||||
#define SGNB b->sign
|
||||
|
||||
static const N zero = {{ 1, ZEROEXP, {0} }};
|
||||
|
||||
static void cpy(const N a, N b)
|
||||
{
|
||||
*b = *a;
|
||||
}
|
||||
|
||||
static void fromreal(REAL x, N a)
|
||||
{
|
||||
int i, e;
|
||||
|
||||
cpy(zero, a);
|
||||
if (x == 0.0) return;
|
||||
|
||||
if (x >= 0) { SGNA = 1; }
|
||||
else { SGNA = -1; x = -x; }
|
||||
|
||||
e = 0;
|
||||
while (x >= 1.0) { x *= IRADIX; ++e; }
|
||||
while (x < IRADIX) { x *= RADIX; --e; }
|
||||
EXA = e;
|
||||
|
||||
for (i = LEN - 1; i >= 0 && x != 0.0; --i) {
|
||||
REAL y;
|
||||
|
||||
x *= RADIX;
|
||||
y = (REAL) ((int) x);
|
||||
AD[i] = (DG)y;
|
||||
x -= y;
|
||||
}
|
||||
}
|
||||
|
||||
static void fromshort(int x, N a)
|
||||
{
|
||||
cpy(zero, a);
|
||||
|
||||
if (x < 0) { x = -x; SGNA = -1; }
|
||||
else { SGNA = 1; }
|
||||
EXA = 1;
|
||||
AD[LEN - 1] = x;
|
||||
}
|
||||
|
||||
static void pack(DG *d, int e, int s, int l, N a)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = l - 1; i >= 0; --i, --e)
|
||||
if (d[i] != 0)
|
||||
break;
|
||||
|
||||
if (i < 0) {
|
||||
/* number is zero */
|
||||
cpy(zero, a);
|
||||
} else {
|
||||
EXA = e;
|
||||
SGNA = s;
|
||||
|
||||
if (i >= LEN - 1) {
|
||||
for (j = LEN - 1; j >= 0; --i, --j)
|
||||
AD[j] = d[i];
|
||||
} else {
|
||||
for (j = LEN - 1; i >= 0; --i, --j)
|
||||
AD[j] = d[i];
|
||||
for ( ; j >= 0; --j)
|
||||
AD[j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* compare absolute values */
|
||||
static int abscmp(const N a, const N b)
|
||||
{
|
||||
int i;
|
||||
if (EXA > EXB) return 1;
|
||||
if (EXA < EXB) return -1;
|
||||
for (i = LEN - 1; i >= 0; --i) {
|
||||
if (AD[i] > BD[i])
|
||||
return 1;
|
||||
if (AD[i] < BD[i])
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int eq(const N a, const N b)
|
||||
{
|
||||
return (SGNA == SGNB) && (abscmp(a, b) == 0);
|
||||
}
|
||||
|
||||
/* add magnitudes, for |a| >= |b| */
|
||||
static void addmag0(int s, const N a, const N b, N c)
|
||||
{
|
||||
int ia, ib;
|
||||
ACC r = 0;
|
||||
DG d[LEN + 1];
|
||||
|
||||
for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) {
|
||||
r += (ACC)AD[ia] + (ACC)BD[ib];
|
||||
d[ia] = LO(r);
|
||||
r = HI(r);
|
||||
}
|
||||
for (; ia < LEN; ++ia) {
|
||||
r += (ACC)AD[ia];
|
||||
d[ia] = LO(r);
|
||||
r = HI(r);
|
||||
}
|
||||
d[ia] = LO(r);
|
||||
pack(d, EXA + 1, s * SGNA, LEN + 1, c);
|
||||
}
|
||||
|
||||
static void addmag(int s, const N a, const N b, N c)
|
||||
{
|
||||
if (abscmp(a, b) > 0) addmag0(1, a, b, c); else addmag0(s, b, a, c);
|
||||
}
|
||||
|
||||
/* subtract magnitudes, for |a| >= |b| */
|
||||
static void submag0(int s, const N a, const N b, N c)
|
||||
{
|
||||
int ia, ib;
|
||||
ACC r = 0;
|
||||
DG d[LEN];
|
||||
|
||||
for (ia = 0, ib = EXA - EXB; ib < LEN; ++ia, ++ib) {
|
||||
r += (ACC)AD[ia] - (ACC)BD[ib];
|
||||
d[ia] = LO(r);
|
||||
r = HI_SIGNED(r);
|
||||
}
|
||||
for (; ia < LEN; ++ia) {
|
||||
r += (ACC)AD[ia];
|
||||
d[ia] = LO(r);
|
||||
r = HI_SIGNED(r);
|
||||
}
|
||||
|
||||
pack(d, EXA, s * SGNA, LEN, c);
|
||||
}
|
||||
|
||||
static void submag(int s, const N a, const N b, N c)
|
||||
{
|
||||
if (abscmp(a, b) > 0) submag0(1, a, b, c); else submag0(s, b, a, c);
|
||||
}
|
||||
|
||||
/* c = a + b */
|
||||
static void add(const N a, const N b, N c)
|
||||
{
|
||||
if (SGNA == SGNB) addmag(1, a, b, c); else submag(1, a, b, c);
|
||||
}
|
||||
|
||||
static void sub(const N a, const N b, N c)
|
||||
{
|
||||
if (SGNA == SGNB) submag(-1, a, b, c); else addmag(-1, a, b, c);
|
||||
}
|
||||
|
||||
static void mul(const N a, const N b, N c)
|
||||
{
|
||||
DG d[2 * LEN];
|
||||
int i, j, k;
|
||||
ACC r;
|
||||
|
||||
for (i = 0; i < LEN; ++i)
|
||||
d[2 * i] = d[2 * i + 1] = 0;
|
||||
|
||||
for (i = 0; i < LEN; ++i) {
|
||||
ACC ai = AD[i];
|
||||
if (ai) {
|
||||
r = 0;
|
||||
for (j = 0, k = i; j < LEN; ++j, ++k) {
|
||||
r += ai * (ACC)BD[j] + (ACC)d[k];
|
||||
d[k] = LO(r);
|
||||
r = HI(r);
|
||||
}
|
||||
d[k] = LO(r);
|
||||
}
|
||||
}
|
||||
|
||||
pack(d, EXA + EXB, SGNA * SGNB, 2 * LEN, c);
|
||||
}
|
||||
|
||||
static REAL toreal(const N a)
|
||||
{
|
||||
REAL h, l, f;
|
||||
int i, bits;
|
||||
ACC r;
|
||||
DG sticky;
|
||||
|
||||
if (EXA != ZEROEXP) {
|
||||
f = IRADIX;
|
||||
i = LEN;
|
||||
|
||||
bits = 0;
|
||||
h = (r = AD[--i]) * f; f *= IRADIX;
|
||||
for (bits = 0; r > 0; ++bits)
|
||||
r >>= 1;
|
||||
|
||||
/* first digit */
|
||||
while (bits + SHFT <= BITS_IN_REAL) {
|
||||
h += AD[--i] * f; f *= IRADIX; bits += SHFT;
|
||||
}
|
||||
|
||||
/* guard digit (leave one bit for sticky bit, hence `<' instead
|
||||
of `<=') */
|
||||
bits = 0; l = 0.0;
|
||||
while (bits + SHFT < BITS_IN_REAL) {
|
||||
l += AD[--i] * f; f *= IRADIX; bits += SHFT;
|
||||
}
|
||||
|
||||
/* sticky bit */
|
||||
sticky = 0;
|
||||
while (i > 0)
|
||||
sticky |= AD[--i];
|
||||
|
||||
if (sticky)
|
||||
l += (RADIX / 2) * f;
|
||||
|
||||
h += l;
|
||||
|
||||
for (i = 0; i < EXA; ++i) h *= (REAL)RADIX;
|
||||
for (i = 0; i > EXA; --i) h *= IRADIX;
|
||||
if (SGNA == -1) h = -h;
|
||||
return h;
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
static void neg(N a)
|
||||
{
|
||||
SGNA = -SGNA;
|
||||
}
|
||||
|
||||
static void inv(const N a, N x)
|
||||
{
|
||||
N w, z, one, two;
|
||||
|
||||
fromreal(1.0 / toreal(a), x); /* initial guess */
|
||||
fromshort(1, one);
|
||||
fromshort(2, two);
|
||||
|
||||
for (;;) {
|
||||
/* Newton */
|
||||
mul(a, x, w);
|
||||
sub(two, w, z);
|
||||
if (eq(one, z)) break;
|
||||
mul(x, z, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* 2 pi */
|
||||
static const N n2pi = {{
|
||||
1, 1,
|
||||
{18450, 59017, 1760, 5212, 9779, 4518, 2886, 54545, 18558, 6}
|
||||
}};
|
||||
|
||||
/* 1 / 31! */
|
||||
static const N i31fac = {{
|
||||
1, -7,
|
||||
{28087, 45433, 51357, 24545, 14291, 3954, 57879, 8109, 38716, 41382}
|
||||
}};
|
||||
|
||||
|
||||
/* 1 / 32! */
|
||||
static const N i32fac = {{
|
||||
1, -7,
|
||||
{52078, 60811, 3652, 39679, 37310, 47227, 28432, 57597, 13497, 1293}
|
||||
}};
|
||||
|
||||
static void msin(const N a, N b)
|
||||
{
|
||||
N a2, g, k;
|
||||
int i;
|
||||
|
||||
cpy(i31fac, g);
|
||||
cpy(g, b);
|
||||
mul(a, a, a2);
|
||||
|
||||
/* Taylor */
|
||||
for (i = 31; i > 1; i -= 2) {
|
||||
fromshort(i * (i - 1), k);
|
||||
mul(k, g, g);
|
||||
mul(a2, b, k);
|
||||
sub(g, k, b);
|
||||
}
|
||||
mul(a, b, b);
|
||||
}
|
||||
|
||||
static void mcos(const N a, N b)
|
||||
{
|
||||
N a2, g, k;
|
||||
int i;
|
||||
|
||||
cpy(i32fac, g);
|
||||
cpy(g, b);
|
||||
mul(a, a, a2);
|
||||
|
||||
/* Taylor */
|
||||
for (i = 32; i > 0; i -= 2) {
|
||||
fromshort(i * (i - 1), k);
|
||||
mul(k, g, g);
|
||||
mul(a2, b, k);
|
||||
sub(g, k, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void by2pi(REAL m, REAL n, N a)
|
||||
{
|
||||
N b;
|
||||
|
||||
fromreal(n, b);
|
||||
inv(b, a);
|
||||
fromreal(m, b);
|
||||
mul(a, b, a);
|
||||
mul(n2pi, a, a);
|
||||
}
|
||||
|
||||
static void sin2pi(REAL m, REAL n, N a);
|
||||
static void cos2pi(REAL m, REAL n, N a)
|
||||
{
|
||||
N b;
|
||||
if (m < 0) cos2pi(-m, n, a);
|
||||
else if (m > n * 0.5) cos2pi(n - m, n, a);
|
||||
else if (m > n * 0.25) {sin2pi(m - n * 0.25, n, a); neg(a);}
|
||||
else if (m > n * 0.125) sin2pi(n * 0.25 - m, n, a);
|
||||
else { by2pi(m, n, b); mcos(b, a); }
|
||||
}
|
||||
|
||||
static void sin2pi(REAL m, REAL n, N a)
|
||||
{
|
||||
N b;
|
||||
if (m < 0) {sin2pi(-m, n, a); neg(a);}
|
||||
else if (m > n * 0.5) {sin2pi(n - m, n, a); neg(a);}
|
||||
else if (m > n * 0.25) {cos2pi(m - n * 0.25, n, a);}
|
||||
else if (m > n * 0.125) {cos2pi(n * 0.25 - m, n, a);}
|
||||
else {by2pi(m, n, b); msin(b, a);}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* FFT stuff */
|
||||
|
||||
/* (r0 + i i0)(r1 + i i1) */
|
||||
static void cmul(N r0, N i0, N r1, N i1, N r2, N i2)
|
||||
{
|
||||
N s, t, q;
|
||||
mul(r0, r1, s);
|
||||
mul(i0, i1, t);
|
||||
sub(s, t, q);
|
||||
mul(r0, i1, s);
|
||||
mul(i0, r1, t);
|
||||
add(s, t, i2);
|
||||
cpy(q, r2);
|
||||
}
|
||||
|
||||
/* (r0 - i i0)(r1 + i i1) */
|
||||
static void cmulj(N r0, N i0, N r1, N i1, N r2, N i2)
|
||||
{
|
||||
N s, t, q;
|
||||
mul(r0, r1, s);
|
||||
mul(i0, i1, t);
|
||||
add(s, t, q);
|
||||
mul(r0, i1, s);
|
||||
mul(i0, r1, t);
|
||||
sub(s, t, i2);
|
||||
cpy(q, r2);
|
||||
}
|
||||
|
||||
static void mcexp(int m, int n, N r, N i)
|
||||
{
|
||||
static int cached_n = -1;
|
||||
static N w[64][2];
|
||||
int k, j;
|
||||
if (n != cached_n) {
|
||||
for (j = 1, k = 0; j < n; j += j, ++k) {
|
||||
cos2pi(j, n, w[k][0]);
|
||||
sin2pi(j, n, w[k][1]);
|
||||
}
|
||||
cached_n = n;
|
||||
}
|
||||
|
||||
fromshort(1, r);
|
||||
fromshort(0, i);
|
||||
if (m > 0) {
|
||||
for (k = 0; m; ++k, m >>= 1)
|
||||
if (m & 1)
|
||||
cmul(w[k][0], w[k][1], r, i, r, i);
|
||||
} else {
|
||||
m = -m;
|
||||
for (k = 0; m; ++k, m >>= 1)
|
||||
if (m & 1)
|
||||
cmulj(w[k][0], w[k][1], r, i, r, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void bitrev(int n, N *a)
|
||||
{
|
||||
int i, j, m;
|
||||
for (i = j = 0; i < n - 1; ++i) {
|
||||
if (i < j) {
|
||||
N t;
|
||||
cpy(a[2*i], t); cpy(a[2*j], a[2*i]); cpy(t, a[2*j]);
|
||||
cpy(a[2*i+1], t); cpy(a[2*j+1], a[2*i+1]); cpy(t, a[2*j+1]);
|
||||
}
|
||||
|
||||
/* bit reversed counter */
|
||||
m = n; do { m >>= 1; j ^= m; } while (!(j & m));
|
||||
}
|
||||
}
|
||||
|
||||
static void fft0(int n, N *a, int sign)
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
bitrev(n, a);
|
||||
for (i = 1; i < n; i = 2 * i) {
|
||||
for (j = 0; j < i; ++j) {
|
||||
N wr, wi;
|
||||
mcexp(sign * (int)j, 2 * i, wr, wi);
|
||||
for (k = j; k < n; k += 2 * i) {
|
||||
N *a0 = a + 2 * k;
|
||||
N *a1 = a0 + 2 * i;
|
||||
N r0, i0, r1, i1, t0, t1, xr, xi;
|
||||
cpy(a0[0], r0); cpy(a0[1], i0);
|
||||
cpy(a1[0], r1); cpy(a1[1], i1);
|
||||
mul(r1, wr, t0); mul(i1, wi, t1); sub(t0, t1, xr);
|
||||
mul(r1, wi, t0); mul(i1, wr, t1); add(t0, t1, xi);
|
||||
add(r0, xr, a0[0]); add(i0, xi, a0[1]);
|
||||
sub(r0, xr, a1[0]); sub(i0, xi, a1[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* a[2*k]+i*a[2*k+1] = exp(2*pi*i*k^2/(2*n)) */
|
||||
static void bluestein_sequence(int n, N *a)
|
||||
{
|
||||
int k, ksq, n2 = 2 * n;
|
||||
|
||||
ksq = 1; /* (-1)^2 */
|
||||
for (k = 0; k < n; ++k) {
|
||||
/* careful with overflow */
|
||||
ksq = ksq + 2*k - 1; while (ksq > n2) ksq -= n2;
|
||||
mcexp(ksq, n2, a[2*k], a[2*k+1]);
|
||||
}
|
||||
}
|
||||
|
||||
static int pow2_atleast(int x)
|
||||
{
|
||||
int h;
|
||||
for (h = 1; h < x; h = 2 * h)
|
||||
;
|
||||
return h;
|
||||
}
|
||||
|
||||
static N *cached_bluestein_w = 0;
|
||||
static N *cached_bluestein_y = 0;
|
||||
static int cached_bluestein_n = -1;
|
||||
|
||||
static void bluestein(int n, N *a)
|
||||
{
|
||||
int nb = pow2_atleast(2 * n);
|
||||
N *b = (N *)bench_malloc(2 * nb * sizeof(N));
|
||||
N *w = cached_bluestein_w;
|
||||
N *y = cached_bluestein_y;
|
||||
N nbinv;
|
||||
int i;
|
||||
|
||||
fromreal(1.0 / nb, nbinv); /* exact because nb = 2^k */
|
||||
|
||||
if (cached_bluestein_n != n) {
|
||||
if (w) bench_free(w);
|
||||
if (y) bench_free(y);
|
||||
w = (N *)bench_malloc(2 * n * sizeof(N));
|
||||
y = (N *)bench_malloc(2 * nb * sizeof(N));
|
||||
cached_bluestein_n = n;
|
||||
cached_bluestein_w = w;
|
||||
cached_bluestein_y = y;
|
||||
|
||||
bluestein_sequence(n, w);
|
||||
for (i = 0; i < 2*nb; ++i) cpy(zero, y[i]);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
cpy(w[2*i], y[2*i]);
|
||||
cpy(w[2*i+1], y[2*i+1]);
|
||||
}
|
||||
for (i = 1; i < n; ++i) {
|
||||
cpy(w[2*i], y[2*(nb-i)]);
|
||||
cpy(w[2*i+1], y[2*(nb-i)+1]);
|
||||
}
|
||||
|
||||
fft0(nb, y, -1);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2*nb; ++i) cpy(zero, b[i]);
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
cmulj(w[2*i], w[2*i+1], a[2*i], a[2*i+1], b[2*i], b[2*i+1]);
|
||||
|
||||
/* scaled convolution b * y */
|
||||
fft0(nb, b, -1);
|
||||
|
||||
for (i = 0; i < nb; ++i)
|
||||
cmul(b[2*i], b[2*i+1], y[2*i], y[2*i+1], b[2*i], b[2*i+1]);
|
||||
fft0(nb, b, 1);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
cmulj(w[2*i], w[2*i+1], b[2*i], b[2*i+1], a[2*i], a[2*i+1]);
|
||||
mul(nbinv, a[2*i], a[2*i]);
|
||||
mul(nbinv, a[2*i+1], a[2*i+1]);
|
||||
}
|
||||
|
||||
bench_free(b);
|
||||
}
|
||||
|
||||
static void swapri(int n, N *a)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
N t;
|
||||
cpy(a[2 * i], t);
|
||||
cpy(a[2 * i + 1], a[2 * i]);
|
||||
cpy(t, a[2 * i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void fft1(int n, N *a, int sign)
|
||||
{
|
||||
if (power_of_two(n)) {
|
||||
fft0(n, a, sign);
|
||||
} else {
|
||||
if (sign == 1) swapri(n, a);
|
||||
bluestein(n, a);
|
||||
if (sign == 1) swapri(n, a);
|
||||
}
|
||||
}
|
||||
|
||||
static void fromrealv(int n, bench_complex *a, N *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
fromreal(c_re(a[i]), b[2 * i]);
|
||||
fromreal(c_im(a[i]), b[2 * i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void compare(int n, N *a, N *b, double *err)
|
||||
{
|
||||
int i;
|
||||
double e1, e2, einf;
|
||||
double n1, n2, ninf;
|
||||
|
||||
e1 = e2 = einf = 0.0;
|
||||
n1 = n2 = ninf = 0.0;
|
||||
|
||||
# define DO(x1, x2, xinf, var) { \
|
||||
double d = var; \
|
||||
if (d < 0) d = -d; \
|
||||
x1 += d; x2 += d * d; if (d > xinf) xinf = d; \
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
N dd;
|
||||
sub(a[i], b[i], dd);
|
||||
DO(n1, n2, ninf, toreal(a[i]));
|
||||
DO(e1, e2, einf, toreal(dd));
|
||||
}
|
||||
|
||||
# undef DO
|
||||
err[0] = e1 / n1;
|
||||
err[1] = sqrt(e2 / n2);
|
||||
err[2] = einf / ninf;
|
||||
}
|
||||
|
||||
void fftaccuracy(int n, bench_complex *a, bench_complex *ffta,
|
||||
int sign, double err[6])
|
||||
{
|
||||
N *b = (N *)bench_malloc(2 * n * sizeof(N));
|
||||
N *fftb = (N *)bench_malloc(2 * n * sizeof(N));
|
||||
N mn, ninv;
|
||||
int i;
|
||||
|
||||
fromreal(n, mn); inv(mn, ninv);
|
||||
|
||||
/* forward error */
|
||||
fromrealv(n, a, b); fromrealv(n, ffta, fftb);
|
||||
fft1(n, b, sign);
|
||||
compare(n, b, fftb, err);
|
||||
|
||||
/* backward error */
|
||||
fromrealv(n, a, b); fromrealv(n, ffta, fftb);
|
||||
for (i = 0; i < 2 * n; ++i) mul(fftb[i], ninv, fftb[i]);
|
||||
fft1(n, fftb, -sign);
|
||||
compare(n, b, fftb, err + 3);
|
||||
|
||||
bench_free(fftb);
|
||||
bench_free(b);
|
||||
}
|
||||
|
||||
void fftaccuracy_done(void)
|
||||
{
|
||||
if (cached_bluestein_w) bench_free(cached_bluestein_w);
|
||||
if (cached_bluestein_y) bench_free(cached_bluestein_y);
|
||||
cached_bluestein_w = 0;
|
||||
cached_bluestein_y = 0;
|
||||
cached_bluestein_n = -1;
|
||||
}
|
||||
172
fftw-3.3.10/libbench2/my-getopt.c
Normal file
172
fftw-3.3.10/libbench2/my-getopt.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "my-getopt.h"
|
||||
|
||||
int my_optind = 1;
|
||||
const char *my_optarg = 0;
|
||||
static const char *scan_pointer = 0;
|
||||
|
||||
void my_usage(const char *progname, const struct my_option *opt)
|
||||
{
|
||||
int i;
|
||||
size_t col = 0;
|
||||
|
||||
fprintf(stdout, "Usage: %s", progname);
|
||||
col += (strlen(progname) + 7);
|
||||
for (i = 0; opt[i].long_name; i++) {
|
||||
size_t option_len;
|
||||
|
||||
option_len = strlen(opt[i].long_name);
|
||||
if (col >= 80 - (option_len + 16)) {
|
||||
fputs("\n\t", stdout);
|
||||
col = 8;
|
||||
}
|
||||
fprintf(stdout, " [--%s", opt[i].long_name);
|
||||
col += (option_len + 4);
|
||||
if (opt[i].short_name < 128) {
|
||||
fprintf(stdout, " | -%c", opt[i].short_name);
|
||||
col += 5;
|
||||
}
|
||||
switch (opt[i].argtype) {
|
||||
case REQARG:
|
||||
fputs(" arg]", stdout);
|
||||
col += 5;
|
||||
break;
|
||||
case OPTARG:
|
||||
fputs(" [arg]]", stdout);
|
||||
col += 10;
|
||||
break;
|
||||
default:
|
||||
fputs("]", stdout);
|
||||
col++;
|
||||
}
|
||||
}
|
||||
|
||||
fputs ("\n", stdout);
|
||||
}
|
||||
|
||||
int my_getopt(int argc, char *argv[], const struct my_option *optarray)
|
||||
{
|
||||
const char *p;
|
||||
const struct my_option *l;
|
||||
|
||||
if (scan_pointer && *scan_pointer) {
|
||||
/* continue a previously scanned argv[] element */
|
||||
p = scan_pointer;
|
||||
goto short_option;
|
||||
} else {
|
||||
/* new argv[] element */
|
||||
if (my_optind >= argc)
|
||||
return -1; /* no more options */
|
||||
|
||||
p = argv[my_optind];
|
||||
|
||||
if (*p++ != '-')
|
||||
return (-1); /* not an option */
|
||||
|
||||
if (!*p)
|
||||
return (-1); /* string is exactly '-' */
|
||||
|
||||
++my_optind;
|
||||
}
|
||||
|
||||
if (*p == '-') {
|
||||
/* long option */
|
||||
scan_pointer = 0;
|
||||
my_optarg = 0;
|
||||
|
||||
++p;
|
||||
|
||||
for (l = optarray; l->short_name; ++l) {
|
||||
size_t len = strlen(l->long_name);
|
||||
if (!strncmp(l->long_name, p, len) &&
|
||||
(!p[len] || p[len] == '=')) {
|
||||
switch (l->argtype) {
|
||||
case NOARG:
|
||||
goto ok;
|
||||
case OPTARG:
|
||||
if (p[len] == '=')
|
||||
my_optarg = p + len + 1;
|
||||
goto ok;
|
||||
case REQARG:
|
||||
if (p[len] == '=') {
|
||||
my_optarg = p + len + 1;
|
||||
goto ok;
|
||||
}
|
||||
if (my_optind >= argc) {
|
||||
fprintf(stderr,
|
||||
"option --%s requires an argument\n",
|
||||
l->long_name);
|
||||
return '?';
|
||||
}
|
||||
my_optarg = argv[my_optind];
|
||||
++my_optind;
|
||||
goto ok;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
short_option:
|
||||
scan_pointer = 0;
|
||||
my_optarg = 0;
|
||||
|
||||
for (l = optarray; l->short_name; ++l) {
|
||||
if (l->short_name == (char)l->short_name &&
|
||||
*p == l->short_name) {
|
||||
++p;
|
||||
switch (l->argtype) {
|
||||
case NOARG:
|
||||
scan_pointer = p;
|
||||
goto ok;
|
||||
case OPTARG:
|
||||
if (*p)
|
||||
my_optarg = p;
|
||||
goto ok;
|
||||
case REQARG:
|
||||
if (*p) {
|
||||
my_optarg = p;
|
||||
} else {
|
||||
if (my_optind >= argc) {
|
||||
fprintf(stderr,
|
||||
"option -%c requires an argument\n",
|
||||
l->short_name);
|
||||
return '?';
|
||||
}
|
||||
my_optarg = argv[my_optind];
|
||||
++my_optind;
|
||||
}
|
||||
goto ok;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "unrecognized option %s\n", argv[my_optind - 1]);
|
||||
return '?';
|
||||
|
||||
ok:
|
||||
return l->short_name;
|
||||
}
|
||||
|
||||
46
fftw-3.3.10/libbench2/my-getopt.h
Normal file
46
fftw-3.3.10/libbench2/my-getopt.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __MY_GETOPT_H__
|
||||
#define __MY_GETOPT_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
enum { REQARG, OPTARG, NOARG };
|
||||
|
||||
struct my_option {
|
||||
const char *long_name;
|
||||
int argtype;
|
||||
int short_name;
|
||||
};
|
||||
|
||||
extern int my_optind;
|
||||
extern const char *my_optarg;
|
||||
|
||||
extern void my_usage(const char *progname, const struct my_option *opt);
|
||||
extern int my_getopt(int argc, char *argv[], const struct my_option *optarray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* __MY_GETOPT_H__ */
|
||||
28
fftw-3.3.10/libbench2/ovtpvt.c
Normal file
28
fftw-3.3.10/libbench2/ovtpvt.c
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
void ovtpvt(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
if (verbose >= 0)
|
||||
vfprintf(stdout, format, ap);
|
||||
va_end(ap);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void ovtpvt_err(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
if (verbose >= 0) {
|
||||
fflush(stdout);
|
||||
vfprintf(stderr, format, ap);
|
||||
}
|
||||
va_end(ap);
|
||||
fflush(stdout);
|
||||
}
|
||||
6
fftw-3.3.10/libbench2/pow2.c
Normal file
6
fftw-3.3.10/libbench2/pow2.c
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
int power_of_two(int n)
|
||||
{
|
||||
return (((n) > 0) && (((n) & ((n) - 1)) == 0));
|
||||
}
|
||||
328
fftw-3.3.10/libbench2/problem.c
Normal file
328
fftw-3.3.10/libbench2/problem.c
Normal file
@@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "config.h"
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
int always_pad_real = 0; /* by default, only pad in-place case */
|
||||
|
||||
typedef enum {
|
||||
SAME, PADDED, HALFISH
|
||||
} n_transform;
|
||||
|
||||
/* funny transformations for last dimension of PROBLEM_REAL */
|
||||
static int transform_n(int n, n_transform nt)
|
||||
{
|
||||
switch (nt) {
|
||||
case SAME: return n;
|
||||
case PADDED: return 2*(n/2+1);
|
||||
case HALFISH: return (n/2+1);
|
||||
default: BENCH_ASSERT(0); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* do what I mean */
|
||||
static bench_tensor *dwim(bench_tensor *t, bench_iodim **last_iodim,
|
||||
n_transform nti, n_transform nto,
|
||||
bench_iodim *dt)
|
||||
{
|
||||
int i;
|
||||
bench_iodim *d, *d1;
|
||||
|
||||
if (!BENCH_FINITE_RNK(t->rnk) || t->rnk < 1)
|
||||
return t;
|
||||
|
||||
i = t->rnk;
|
||||
d1 = *last_iodim;
|
||||
|
||||
while (--i >= 0) {
|
||||
d = t->dims + i;
|
||||
if (!d->is)
|
||||
d->is = d1->is * transform_n(d1->n, d1==dt ? nti : SAME);
|
||||
if (!d->os)
|
||||
d->os = d1->os * transform_n(d1->n, d1==dt ? nto : SAME);
|
||||
d1 = d;
|
||||
}
|
||||
|
||||
*last_iodim = d1;
|
||||
return t;
|
||||
}
|
||||
|
||||
static void transpose_tensor(bench_tensor *t)
|
||||
{
|
||||
if (!BENCH_FINITE_RNK(t->rnk) || t->rnk < 2)
|
||||
return;
|
||||
|
||||
t->dims[0].os = t->dims[1].os;
|
||||
t->dims[1].os = t->dims[0].os * t->dims[0].n;
|
||||
}
|
||||
|
||||
static const char *parseint(const char *s, int *n)
|
||||
{
|
||||
int sign = 1;
|
||||
|
||||
*n = 0;
|
||||
|
||||
if (*s == '-') {
|
||||
sign = -1;
|
||||
++s;
|
||||
} else if (*s == '+') {
|
||||
sign = +1;
|
||||
++s;
|
||||
}
|
||||
|
||||
BENCH_ASSERT(isdigit(*s));
|
||||
while (isdigit(*s)) {
|
||||
*n = *n * 10 + (*s - '0');
|
||||
++s;
|
||||
}
|
||||
|
||||
*n *= sign;
|
||||
|
||||
if (*s == 'k' || *s == 'K') {
|
||||
*n *= 1024;
|
||||
++s;
|
||||
}
|
||||
|
||||
if (*s == 'm' || *s == 'M') {
|
||||
*n *= 1024 * 1024;
|
||||
++s;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
struct dimlist { bench_iodim car; r2r_kind_t k; struct dimlist *cdr; };
|
||||
|
||||
static const char *parsetensor(const char *s, bench_tensor **tp,
|
||||
r2r_kind_t **k)
|
||||
{
|
||||
struct dimlist *l = 0, *m;
|
||||
bench_tensor *t;
|
||||
int rnk = 0;
|
||||
|
||||
L1:
|
||||
m = (struct dimlist *)bench_malloc(sizeof(struct dimlist));
|
||||
/* nconc onto l */
|
||||
m->cdr = l; l = m;
|
||||
++rnk;
|
||||
|
||||
s = parseint(s, &m->car.n);
|
||||
|
||||
if (*s == ':') {
|
||||
/* read input stride */
|
||||
++s;
|
||||
s = parseint(s, &m->car.is);
|
||||
if (*s == ':') {
|
||||
/* read output stride */
|
||||
++s;
|
||||
s = parseint(s, &m->car.os);
|
||||
} else {
|
||||
/* default */
|
||||
m->car.os = m->car.is;
|
||||
}
|
||||
} else {
|
||||
m->car.is = 0;
|
||||
m->car.os = 0;
|
||||
}
|
||||
|
||||
if (*s == 'f' || *s == 'F') {
|
||||
m->k = R2R_R2HC;
|
||||
++s;
|
||||
}
|
||||
else if (*s == 'b' || *s == 'B') {
|
||||
m->k = R2R_HC2R;
|
||||
++s;
|
||||
}
|
||||
else if (*s == 'h' || *s == 'H') {
|
||||
m->k = R2R_DHT;
|
||||
++s;
|
||||
}
|
||||
else if (*s == 'e' || *s == 'E' || *s == 'o' || *s == 'O') {
|
||||
char c = *(s++);
|
||||
int ab;
|
||||
|
||||
s = parseint(s, &ab);
|
||||
|
||||
if (c == 'e' || c == 'E') {
|
||||
if (ab == 0)
|
||||
m->k = R2R_REDFT00;
|
||||
else if (ab == 1)
|
||||
m->k = R2R_REDFT01;
|
||||
else if (ab == 10)
|
||||
m->k = R2R_REDFT10;
|
||||
else if (ab == 11)
|
||||
m->k = R2R_REDFT11;
|
||||
else
|
||||
BENCH_ASSERT(0);
|
||||
}
|
||||
else {
|
||||
if (ab == 0)
|
||||
m->k = R2R_RODFT00;
|
||||
else if (ab == 1)
|
||||
m->k = R2R_RODFT01;
|
||||
else if (ab == 10)
|
||||
m->k = R2R_RODFT10;
|
||||
else if (ab == 11)
|
||||
m->k = R2R_RODFT11;
|
||||
else
|
||||
BENCH_ASSERT(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
m->k = R2R_R2HC;
|
||||
|
||||
if (*s == 'x' || *s == 'X') {
|
||||
++s;
|
||||
goto L1;
|
||||
}
|
||||
|
||||
/* now we have a dimlist. Build bench_tensor, etc. */
|
||||
|
||||
if (k && rnk > 0) {
|
||||
int i;
|
||||
*k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * rnk);
|
||||
for (m = l, i = rnk - 1; i >= 0; --i, m = m->cdr) {
|
||||
BENCH_ASSERT(m);
|
||||
(*k)[i] = m->k;
|
||||
}
|
||||
}
|
||||
|
||||
t = mktensor(rnk);
|
||||
while (--rnk >= 0) {
|
||||
bench_iodim *d = t->dims + rnk;
|
||||
BENCH_ASSERT(l);
|
||||
m = l; l = m->cdr;
|
||||
d->n = m->car.n;
|
||||
d->is = m->car.is;
|
||||
d->os = m->car.os;
|
||||
bench_free(m);
|
||||
}
|
||||
|
||||
*tp = t;
|
||||
return s;
|
||||
}
|
||||
|
||||
/* parse a problem description, return a problem */
|
||||
bench_problem *problem_parse(const char *s)
|
||||
{
|
||||
bench_problem *p;
|
||||
bench_iodim last_iodim0 = {1,1,1}, *last_iodim = &last_iodim0;
|
||||
bench_iodim *sz_last_iodim;
|
||||
bench_tensor *sz;
|
||||
n_transform nti = SAME, nto = SAME;
|
||||
int transpose = 0;
|
||||
|
||||
p = (bench_problem *) bench_malloc(sizeof(bench_problem));
|
||||
p->kind = PROBLEM_COMPLEX;
|
||||
p->k = 0;
|
||||
p->sign = -1;
|
||||
p->in = p->out = 0;
|
||||
p->inphys = p->outphys = 0;
|
||||
p->iphyssz = p->ophyssz = 0;
|
||||
p->in_place = 0;
|
||||
p->destroy_input = 0;
|
||||
p->split = 0;
|
||||
p->userinfo = 0;
|
||||
p->scrambled_in = p->scrambled_out = 0;
|
||||
p->sz = p->vecsz = 0;
|
||||
p->ini = p->outi = 0;
|
||||
p->pstring = (char *) bench_malloc(sizeof(char) * (strlen(s) + 1));
|
||||
strcpy(p->pstring, s);
|
||||
|
||||
L1:
|
||||
switch (tolower(*s)) {
|
||||
case 'i': p->in_place = 1; ++s; goto L1;
|
||||
case 'o': p->in_place = 0; ++s; goto L1;
|
||||
case 'd': p->destroy_input = 1; ++s; goto L1;
|
||||
case '/': p->split = 1; ++s; goto L1;
|
||||
case 'f':
|
||||
case '-': p->sign = -1; ++s; goto L1;
|
||||
case 'b':
|
||||
case '+': p->sign = 1; ++s; goto L1;
|
||||
case 'r': p->kind = PROBLEM_REAL; ++s; goto L1;
|
||||
case 'c': p->kind = PROBLEM_COMPLEX; ++s; goto L1;
|
||||
case 'k': p->kind = PROBLEM_R2R; ++s; goto L1;
|
||||
case 't': transpose = 1; ++s; goto L1;
|
||||
|
||||
/* hack for MPI: */
|
||||
case '[': p->scrambled_in = 1; ++s; goto L1;
|
||||
case ']': p->scrambled_out = 1; ++s; goto L1;
|
||||
|
||||
default : ;
|
||||
}
|
||||
|
||||
s = parsetensor(s, &sz, p->kind == PROBLEM_R2R ? &p->k : 0);
|
||||
|
||||
if (p->kind == PROBLEM_REAL) {
|
||||
if (p->sign < 0) {
|
||||
nti = p->in_place || always_pad_real ? PADDED : SAME;
|
||||
nto = HALFISH;
|
||||
}
|
||||
else {
|
||||
nti = HALFISH;
|
||||
nto = p->in_place || always_pad_real ? PADDED : SAME;
|
||||
}
|
||||
}
|
||||
|
||||
sz_last_iodim = sz->dims + sz->rnk - 1;
|
||||
if (*s == '*') { /* "external" vector */
|
||||
++s;
|
||||
p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
||||
s = parsetensor(s, &sz, 0);
|
||||
p->vecsz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
||||
} else if (*s == 'v' || *s == 'V') { /* "internal" vector */
|
||||
bench_tensor *vecsz;
|
||||
++s;
|
||||
s = parsetensor(s, &vecsz, 0);
|
||||
p->vecsz = dwim(vecsz, &last_iodim, nti, nto, sz_last_iodim);
|
||||
p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
||||
} else {
|
||||
p->sz = dwim(sz, &last_iodim, nti, nto, sz_last_iodim);
|
||||
p->vecsz = mktensor(0);
|
||||
}
|
||||
|
||||
if (transpose) {
|
||||
transpose_tensor(p->sz);
|
||||
transpose_tensor(p->vecsz);
|
||||
}
|
||||
|
||||
if (!p->in_place)
|
||||
p->out = ((bench_real *) p->in) + (1 << 20); /* whatever */
|
||||
|
||||
BENCH_ASSERT(p->sz && p->vecsz);
|
||||
BENCH_ASSERT(!*s);
|
||||
return p;
|
||||
}
|
||||
|
||||
void problem_destroy(bench_problem *p)
|
||||
{
|
||||
BENCH_ASSERT(p);
|
||||
problem_free(p);
|
||||
bench_free0(p->k);
|
||||
bench_free0(p->pstring);
|
||||
bench_free(p);
|
||||
}
|
||||
|
||||
137
fftw-3.3.10/libbench2/report.c
Normal file
137
fftw-3.3.10/libbench2/report.c
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
void (*report)(const bench_problem *p, double *t, int st);
|
||||
|
||||
#undef min
|
||||
#undef max /* you never know */
|
||||
|
||||
struct stats {
|
||||
double min;
|
||||
double max;
|
||||
double avg;
|
||||
double median;
|
||||
};
|
||||
|
||||
static void mkstat(double *t, int st, struct stats *a)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
a->min = t[0];
|
||||
a->max = t[0];
|
||||
a->avg = 0.0;
|
||||
|
||||
for (i = 0; i < st; ++i) {
|
||||
if (t[i] < a->min)
|
||||
a->min = t[i];
|
||||
if (t[i] > a->max)
|
||||
a->max = t[i];
|
||||
a->avg += t[i];
|
||||
}
|
||||
a->avg /= (double)st;
|
||||
|
||||
/* compute median --- silly bubblesort algorithm */
|
||||
for (i = st - 1; i > 1; --i) {
|
||||
for (j = 0; j < i - 1; ++j) {
|
||||
double t0, t1;
|
||||
if ((t0 = t[j]) > (t1 = t[j + 1])) {
|
||||
t[j] = t1;
|
||||
t[j + 1] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
a->median = t[st / 2];
|
||||
}
|
||||
|
||||
void report_mflops(const bench_problem *p, double *t, int st)
|
||||
{
|
||||
struct stats s;
|
||||
mkstat(t, st, &s);
|
||||
ovtpvt("(%g %g %g %g)\n",
|
||||
mflops(p, s.max), mflops(p, s.avg),
|
||||
mflops(p, s.min), mflops(p, s.median));
|
||||
}
|
||||
|
||||
void report_time(const bench_problem *p, double *t, int st)
|
||||
{
|
||||
struct stats s;
|
||||
UNUSED(p);
|
||||
mkstat(t, st, &s);
|
||||
ovtpvt("(%g %g %g %g)\n", s.min, s.avg, s.max, s.median);
|
||||
}
|
||||
|
||||
void report_benchmark(const bench_problem *p, double *t, int st)
|
||||
{
|
||||
struct stats s;
|
||||
mkstat(t, st, &s);
|
||||
ovtpvt("%.8g %.8g %g\n", mflops(p, s.min), s.min, p->setup_time);
|
||||
}
|
||||
|
||||
static void sprintf_time(double x, char *buf, int buflen)
|
||||
{
|
||||
#ifdef HAVE_SNPRINTF
|
||||
# define MY_SPRINTF(a, b) snprintf(buf, buflen, a, b)
|
||||
#else
|
||||
# define MY_SPRINTF(a, b) sprintf(buf, a, b)
|
||||
#endif
|
||||
if (x < 1.0E-6)
|
||||
MY_SPRINTF("%.2f ns", x * 1.0E9);
|
||||
else if (x < 1.0E-3)
|
||||
MY_SPRINTF("%.2f us", x * 1.0E6);
|
||||
else if (x < 1.0)
|
||||
MY_SPRINTF("%.2f ms", x * 1.0E3);
|
||||
else
|
||||
MY_SPRINTF("%.2f s", x);
|
||||
#undef MY_SPRINTF
|
||||
}
|
||||
|
||||
void report_verbose(const bench_problem *p, double *t, int st)
|
||||
{
|
||||
struct stats s;
|
||||
char bmin[64], bmax[64], bavg[64], bmedian[64], btmin[64];
|
||||
char bsetup[64];
|
||||
int copyp = tensor_sz(p->sz) == 1;
|
||||
|
||||
mkstat(t, st, &s);
|
||||
|
||||
sprintf_time(s.min, bmin, 64);
|
||||
sprintf_time(s.max, bmax, 64);
|
||||
sprintf_time(s.avg, bavg, 64);
|
||||
sprintf_time(s.median, bmedian, 64);
|
||||
sprintf_time(time_min, btmin, 64);
|
||||
sprintf_time(p->setup_time, bsetup, 64);
|
||||
|
||||
ovtpvt("Problem: %s, setup: %s, time: %s, %s: %.8g\n",
|
||||
p->pstring, bsetup, bmin,
|
||||
copyp ? "fp-move/us" : "``mflops''",
|
||||
mflops(p, s.min));
|
||||
|
||||
if (verbose) {
|
||||
ovtpvt("Took %d measurements for at least %s each.\n", st, btmin);
|
||||
ovtpvt("Time: min %s, max %s, avg %s, median %s\n",
|
||||
bmin, bmax, bavg, bmedian);
|
||||
}
|
||||
}
|
||||
94
fftw-3.3.10/libbench2/speed.c
Normal file
94
fftw-3.3.10/libbench2/speed.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
int no_speed_allocation = 0; /* 1 to not allocate array data in speed() */
|
||||
|
||||
void speed(const char *param, int setup_only)
|
||||
{
|
||||
double *t;
|
||||
int iter = 0, k;
|
||||
bench_problem *p;
|
||||
double tmin, y;
|
||||
|
||||
t = (double *) bench_malloc(time_repeat * sizeof(double));
|
||||
|
||||
for (k = 0; k < time_repeat; ++k)
|
||||
t[k] = 0;
|
||||
|
||||
p = problem_parse(param);
|
||||
BENCH_ASSERT(can_do(p));
|
||||
if (!no_speed_allocation) {
|
||||
problem_alloc(p);
|
||||
problem_zero(p);
|
||||
}
|
||||
|
||||
timer_start(LIBBENCH_TIMER);
|
||||
setup(p);
|
||||
p->setup_time = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER));
|
||||
|
||||
/* reset the input to zero again, because the planner in paranoid
|
||||
mode sets it to random values, thus making the benchmark
|
||||
diverge. */
|
||||
if (!no_speed_allocation)
|
||||
problem_zero(p);
|
||||
|
||||
if (setup_only)
|
||||
goto done;
|
||||
|
||||
start_over:
|
||||
for (iter = 1; iter < (1<<30); iter *= 2) {
|
||||
tmin = 1.0e20;
|
||||
for (k = 0; k < time_repeat; ++k) {
|
||||
timer_start(LIBBENCH_TIMER);
|
||||
doit(iter, p);
|
||||
y = bench_cost_postprocess(timer_stop(LIBBENCH_TIMER));
|
||||
if (y < 0) /* yes, it happens */
|
||||
goto start_over;
|
||||
t[k] = y;
|
||||
if (y < tmin)
|
||||
tmin = y;
|
||||
}
|
||||
|
||||
if (tmin >= time_min)
|
||||
goto done;
|
||||
}
|
||||
|
||||
goto start_over; /* this also happens */
|
||||
|
||||
done:
|
||||
done(p);
|
||||
|
||||
if (iter)
|
||||
for (k = 0; k < time_repeat; ++k)
|
||||
t[k] /= iter;
|
||||
else
|
||||
for (k = 0; k < time_repeat; ++k)
|
||||
t[k] = 0;
|
||||
|
||||
report(p, t, time_repeat);
|
||||
|
||||
if (!no_speed_allocation)
|
||||
problem_destroy(p);
|
||||
bench_free(t);
|
||||
return;
|
||||
}
|
||||
240
fftw-3.3.10/libbench2/tensor.c
Normal file
240
fftw-3.3.10/libbench2/tensor.c
Normal file
@@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
bench_tensor *mktensor(int rnk)
|
||||
{
|
||||
bench_tensor *x;
|
||||
|
||||
BENCH_ASSERT(rnk >= 0);
|
||||
|
||||
x = (bench_tensor *)bench_malloc(sizeof(bench_tensor));
|
||||
if (BENCH_FINITE_RNK(rnk) && rnk > 0)
|
||||
x->dims = (bench_iodim *)bench_malloc(sizeof(bench_iodim) * rnk);
|
||||
else
|
||||
x->dims = 0;
|
||||
|
||||
x->rnk = rnk;
|
||||
return x;
|
||||
}
|
||||
|
||||
void tensor_destroy(bench_tensor *sz)
|
||||
{
|
||||
bench_free0(sz->dims);
|
||||
bench_free(sz);
|
||||
}
|
||||
|
||||
size_t tensor_sz(const bench_tensor *sz)
|
||||
{
|
||||
int i;
|
||||
size_t n = 1;
|
||||
|
||||
if (!BENCH_FINITE_RNK(sz->rnk))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < sz->rnk; ++i)
|
||||
n *= sz->dims[i].n;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/* total order among bench_iodim's */
|
||||
static int dimcmp(const bench_iodim *a, const bench_iodim *b)
|
||||
{
|
||||
if (b->is != a->is)
|
||||
return (b->is - a->is); /* shorter strides go later */
|
||||
if (b->os != a->os)
|
||||
return (b->os - a->os); /* shorter strides go later */
|
||||
return (int)(a->n - b->n); /* larger n's go later */
|
||||
}
|
||||
|
||||
bench_tensor *tensor_compress(const bench_tensor *sz)
|
||||
{
|
||||
int i, rnk;
|
||||
bench_tensor *x;
|
||||
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(sz->rnk));
|
||||
for (i = rnk = 0; i < sz->rnk; ++i) {
|
||||
BENCH_ASSERT(sz->dims[i].n > 0);
|
||||
if (sz->dims[i].n != 1)
|
||||
++rnk;
|
||||
}
|
||||
|
||||
x = mktensor(rnk);
|
||||
for (i = rnk = 0; i < sz->rnk; ++i) {
|
||||
if (sz->dims[i].n != 1)
|
||||
x->dims[rnk++] = sz->dims[i];
|
||||
}
|
||||
|
||||
if (rnk) {
|
||||
/* God knows how qsort() behaves if n==0 */
|
||||
qsort(x->dims, (size_t)x->rnk, sizeof(bench_iodim),
|
||||
(int (*)(const void *, const void *))dimcmp);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
int tensor_unitstridep(bench_tensor *t)
|
||||
{
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
||||
return (t->rnk == 0 ||
|
||||
(t->dims[t->rnk - 1].is == 1 && t->dims[t->rnk - 1].os == 1));
|
||||
}
|
||||
|
||||
/* detect screwy real padded rowmajor... ugh */
|
||||
int tensor_real_rowmajorp(bench_tensor *t, int sign, int in_place)
|
||||
{
|
||||
int i;
|
||||
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
||||
|
||||
i = t->rnk - 1;
|
||||
|
||||
if (--i >= 0) {
|
||||
bench_iodim *d = t->dims + i;
|
||||
if (sign < 0) {
|
||||
if (d[0].is != d[1].is * (in_place ? 2*(d[1].n/2 + 1) : d[1].n))
|
||||
return 0;
|
||||
if (d[0].os != d[1].os * (d[1].n/2 + 1))
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if (d[0].is != d[1].is * (d[1].n/2 + 1))
|
||||
return 0;
|
||||
if (d[0].os != d[1].os * (in_place ? 2*(d[1].n/2 + 1) : d[1].n))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
while (--i >= 0) {
|
||||
bench_iodim *d = t->dims + i;
|
||||
if (d[0].is != d[1].is * d[1].n)
|
||||
return 0;
|
||||
if (d[0].os != d[1].os * d[1].n)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int tensor_rowmajorp(bench_tensor *t)
|
||||
{
|
||||
int i;
|
||||
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk));
|
||||
|
||||
i = t->rnk - 1;
|
||||
while (--i >= 0) {
|
||||
bench_iodim *d = t->dims + i;
|
||||
if (d[0].is != d[1].is * d[1].n)
|
||||
return 0;
|
||||
if (d[0].os != d[1].os * d[1].n)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void dimcpy(bench_iodim *dst, const bench_iodim *src, int rnk)
|
||||
{
|
||||
int i;
|
||||
if (BENCH_FINITE_RNK(rnk))
|
||||
for (i = 0; i < rnk; ++i)
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
bench_tensor *tensor_append(const bench_tensor *a, const bench_tensor *b)
|
||||
{
|
||||
if (!BENCH_FINITE_RNK(a->rnk) || !BENCH_FINITE_RNK(b->rnk)) {
|
||||
return mktensor(BENCH_RNK_MINFTY);
|
||||
} else {
|
||||
bench_tensor *x = mktensor(a->rnk + b->rnk);
|
||||
dimcpy(x->dims, a->dims, a->rnk);
|
||||
dimcpy(x->dims + a->rnk, b->dims, b->rnk);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
static int imax(int a, int b)
|
||||
{
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
static int imin(int a, int b)
|
||||
{
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
#define DEFBOUNDS(name, xs) \
|
||||
void name(bench_tensor *t, int *lbp, int *ubp) \
|
||||
{ \
|
||||
int lb = 0; \
|
||||
int ub = 1; \
|
||||
int i; \
|
||||
\
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(t->rnk)); \
|
||||
\
|
||||
for (i = 0; i < t->rnk; ++i) { \
|
||||
bench_iodim *d = t->dims + i; \
|
||||
int n = d->n; \
|
||||
int s = d->xs; \
|
||||
lb = imin(lb, lb + s * (n - 1)); \
|
||||
ub = imax(ub, ub + s * (n - 1)); \
|
||||
} \
|
||||
\
|
||||
*lbp = lb; \
|
||||
*ubp = ub; \
|
||||
}
|
||||
|
||||
DEFBOUNDS(tensor_ibounds, is)
|
||||
DEFBOUNDS(tensor_obounds, os)
|
||||
|
||||
bench_tensor *tensor_copy(const bench_tensor *sz)
|
||||
{
|
||||
bench_tensor *x = mktensor(sz->rnk);
|
||||
dimcpy(x->dims, sz->dims, sz->rnk);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Like tensor_copy, but copy only rnk dimensions starting with start_dim. */
|
||||
bench_tensor *tensor_copy_sub(const bench_tensor *sz, int start_dim, int rnk)
|
||||
{
|
||||
bench_tensor *x;
|
||||
|
||||
BENCH_ASSERT(BENCH_FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk);
|
||||
x = mktensor(rnk);
|
||||
dimcpy(x->dims, sz->dims + start_dim, rnk);
|
||||
return x;
|
||||
}
|
||||
|
||||
bench_tensor *tensor_copy_swapio(const bench_tensor *sz)
|
||||
{
|
||||
bench_tensor *x = tensor_copy(sz);
|
||||
int i;
|
||||
if (BENCH_FINITE_RNK(x->rnk))
|
||||
for (i = 0; i < x->rnk; ++i) {
|
||||
int s;
|
||||
s = x->dims[i].is;
|
||||
x->dims[i].is = x->dims[i].os;
|
||||
x->dims[i].os = s;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
134
fftw-3.3.10/libbench2/timer.c
Normal file
134
fftw-3.3.10/libbench2/timer.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/*
|
||||
* System-dependent timing functions:
|
||||
*/
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BSDGETTIMEOFDAY
|
||||
#ifndef HAVE_GETTIMEOFDAY
|
||||
#define gettimeofday BSDgettimeofday
|
||||
#define HAVE_GETTIMEOFDAY 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
double time_min;
|
||||
int time_repeat;
|
||||
|
||||
#if !defined(HAVE_TIMER) && (defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS) || defined(__CYGWIN__))
|
||||
#include <windows.h>
|
||||
typedef LARGE_INTEGER mytime;
|
||||
|
||||
static mytime get_time(void)
|
||||
{
|
||||
mytime tv;
|
||||
QueryPerformanceCounter(&tv);
|
||||
return tv;
|
||||
}
|
||||
|
||||
static double elapsed(mytime t1, mytime t0)
|
||||
{
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
return (((double) t1.QuadPart - (double) t0.QuadPart)) /
|
||||
((double) freq.QuadPart);
|
||||
}
|
||||
|
||||
#define HAVE_TIMER
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_TIMER)
|
||||
typedef struct timeval mytime;
|
||||
|
||||
static mytime get_time(void)
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, 0);
|
||||
return tv;
|
||||
}
|
||||
|
||||
static double elapsed(mytime t1, mytime t0)
|
||||
{
|
||||
return ((double) t1.tv_sec - (double) t0.tv_sec) +
|
||||
((double) t1.tv_usec - (double) t0.tv_usec) * 1.0E-6;
|
||||
}
|
||||
|
||||
#define HAVE_TIMER
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_TIMER
|
||||
#error "timer not defined"
|
||||
#endif
|
||||
|
||||
static double calibrate(void)
|
||||
{
|
||||
/* there seems to be no reasonable way to calibrate the
|
||||
clock automatically any longer. Grrr... */
|
||||
|
||||
return 0.01;
|
||||
}
|
||||
|
||||
|
||||
void timer_init(double tmin, int repeat)
|
||||
{
|
||||
static int inited = 0;
|
||||
|
||||
if (inited)
|
||||
return;
|
||||
inited = 1;
|
||||
|
||||
if (!repeat)
|
||||
repeat = 8;
|
||||
time_repeat = repeat;
|
||||
|
||||
if (tmin > 0)
|
||||
time_min = tmin;
|
||||
else
|
||||
time_min = calibrate();
|
||||
}
|
||||
|
||||
static mytime t0[BENCH_NTIMERS];
|
||||
|
||||
void timer_start(int n)
|
||||
{
|
||||
BENCH_ASSERT(n >= 0 && n < BENCH_NTIMERS);
|
||||
t0[n] = get_time();
|
||||
}
|
||||
|
||||
double timer_stop(int n)
|
||||
{
|
||||
mytime t1;
|
||||
BENCH_ASSERT(n >= 0 && n < BENCH_NTIMERS);
|
||||
t1 = get_time();
|
||||
return elapsed(t1, t0[n]);
|
||||
}
|
||||
|
||||
29
fftw-3.3.10/libbench2/useropt.c
Normal file
29
fftw-3.3.10/libbench2/useropt.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2000 Matteo Frigo
|
||||
* Copyright (c) 2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
void useropt(const char *arg)
|
||||
{
|
||||
ovtpvt_err("unknown user option: %s. Ignoring.\n", arg);
|
||||
}
|
||||
234
fftw-3.3.10/libbench2/util.c
Normal file
234
fftw-3.3.10/libbench2/util.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (c) 2000 Matteo Frigo
|
||||
* Copyright (c) 2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <math.h>
|
||||
|
||||
#if defined(HAVE_MALLOC_H)
|
||||
# include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN
|
||||
extern void *memalign(size_t, size_t);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN
|
||||
extern int posix_memalign(void **, size_t, size_t);
|
||||
#endif
|
||||
|
||||
void bench_assertion_failed(const char *s, int line, const char *file)
|
||||
{
|
||||
ovtpvt_err("bench: %s:%d: assertion failed: %s\n", file, line, s);
|
||||
bench_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#ifdef HAVE_DRAND48
|
||||
# if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48
|
||||
extern double drand48(void);
|
||||
# endif
|
||||
double bench_drand(void)
|
||||
{
|
||||
return drand48() - 0.5;
|
||||
}
|
||||
# if defined(HAVE_DECL_SRAND48) && !HAVE_DECL_SRAND48
|
||||
extern void srand48(long);
|
||||
# endif
|
||||
void bench_srand(int seed)
|
||||
{
|
||||
srand48(seed);
|
||||
}
|
||||
#else
|
||||
double bench_drand(void)
|
||||
{
|
||||
double d = rand();
|
||||
return (d / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
void bench_srand(int seed)
|
||||
{
|
||||
srand(seed);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**********************************************************
|
||||
* DEBUGGING CODE
|
||||
**********************************************************/
|
||||
#ifdef BENCH_DEBUG
|
||||
static int bench_malloc_cnt = 0;
|
||||
|
||||
/*
|
||||
* debugging malloc/free. Initialize every malloced and freed area to
|
||||
* random values, just to make sure we are not using uninitialized
|
||||
* pointers. Also check for writes past the ends of allocated blocks,
|
||||
* and a couple of other things.
|
||||
*
|
||||
* This code is a quick and dirty hack -- use at your own risk.
|
||||
*/
|
||||
|
||||
static int bench_malloc_total = 0, bench_malloc_max = 0, bench_malloc_cnt_max = 0;
|
||||
|
||||
#define MAGIC ((size_t)0xABadCafe)
|
||||
#define PAD_FACTOR 2
|
||||
#define TWO_SIZE_T (2 * sizeof(size_t))
|
||||
|
||||
#define VERBOSE_ALLOCATION 0
|
||||
|
||||
#if VERBOSE_ALLOCATION
|
||||
#define WHEN_VERBOSE(a) a
|
||||
#else
|
||||
#define WHEN_VERBOSE(a)
|
||||
#endif
|
||||
|
||||
void *bench_malloc(size_t n)
|
||||
{
|
||||
char *p;
|
||||
size_t i;
|
||||
|
||||
bench_malloc_total += n;
|
||||
|
||||
if (bench_malloc_total > bench_malloc_max)
|
||||
bench_malloc_max = bench_malloc_total;
|
||||
|
||||
p = (char *) malloc(PAD_FACTOR * n + TWO_SIZE_T);
|
||||
BENCH_ASSERT(p);
|
||||
|
||||
/* store the size in a known position */
|
||||
((size_t *) p)[0] = n;
|
||||
((size_t *) p)[1] = MAGIC;
|
||||
for (i = 0; i < PAD_FACTOR * n; i++)
|
||||
p[i + TWO_SIZE_T] = (char) (i ^ 0xDEADBEEF);
|
||||
|
||||
++bench_malloc_cnt;
|
||||
|
||||
if (bench_malloc_cnt > bench_malloc_cnt_max)
|
||||
bench_malloc_cnt_max = bench_malloc_cnt;
|
||||
|
||||
/* skip the size we stored previously */
|
||||
return (void *) (p + TWO_SIZE_T);
|
||||
}
|
||||
|
||||
void bench_free(void *p)
|
||||
{
|
||||
char *q;
|
||||
|
||||
BENCH_ASSERT(p);
|
||||
|
||||
q = ((char *) p) - TWO_SIZE_T;
|
||||
BENCH_ASSERT(q);
|
||||
|
||||
{
|
||||
size_t n = ((size_t *) q)[0];
|
||||
size_t magic = ((size_t *) q)[1];
|
||||
size_t i;
|
||||
|
||||
((size_t *) q)[0] = 0; /* set to zero to detect duplicate free's */
|
||||
|
||||
BENCH_ASSERT(magic == MAGIC);
|
||||
((size_t *) q)[1] = ~MAGIC;
|
||||
|
||||
bench_malloc_total -= n;
|
||||
BENCH_ASSERT(bench_malloc_total >= 0);
|
||||
|
||||
/* check for writing past end of array: */
|
||||
for (i = n; i < PAD_FACTOR * n; ++i)
|
||||
if (q[i + TWO_SIZE_T] != (char) (i ^ 0xDEADBEEF)) {
|
||||
BENCH_ASSERT(0 /* array bounds overwritten */);
|
||||
}
|
||||
for (i = 0; i < PAD_FACTOR * n; ++i)
|
||||
q[i + TWO_SIZE_T] = (char) (i ^ 0xBEEFDEAD);
|
||||
|
||||
--bench_malloc_cnt;
|
||||
|
||||
BENCH_ASSERT(bench_malloc_cnt >= 0);
|
||||
|
||||
BENCH_ASSERT(
|
||||
(bench_malloc_cnt == 0 && bench_malloc_total == 0) ||
|
||||
(bench_malloc_cnt > 0 && bench_malloc_total > 0));
|
||||
|
||||
free(q);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
/**********************************************************
|
||||
* NON DEBUGGING CODE
|
||||
**********************************************************/
|
||||
/* production version, no hacks */
|
||||
|
||||
#define MIN_ALIGNMENT 128 /* must be power of two */
|
||||
|
||||
#define real_free free /* memalign and malloc use ordinary free */
|
||||
|
||||
void *bench_malloc(size_t n)
|
||||
{
|
||||
void *p;
|
||||
if (n == 0) n = 1;
|
||||
|
||||
#if defined(WITH_OUR_MALLOC)
|
||||
/* Our own aligned malloc/free. Assumes sizeof(void*) is
|
||||
a power of two <= 8 and that malloc is at least
|
||||
sizeof(void*)-aligned. Assumes size_t = uintptr_t. */
|
||||
{
|
||||
void *p0;
|
||||
if ((p0 = malloc(n + MIN_ALIGNMENT))) {
|
||||
p = (void *) (((size_t) p0 + MIN_ALIGNMENT) & (~((size_t) (MIN_ALIGNMENT - 1))));
|
||||
*((void **) p - 1) = p0;
|
||||
}
|
||||
else
|
||||
p = (void *) 0;
|
||||
}
|
||||
#elif defined(HAVE_MEMALIGN)
|
||||
p = memalign(MIN_ALIGNMENT, n);
|
||||
#elif defined(HAVE_POSIX_MEMALIGN)
|
||||
/* note: posix_memalign is broken in glibc 2.2.5: it constrains
|
||||
the size, not the alignment, to be (power of two) * sizeof(void*).
|
||||
The bug seems to have been fixed as of glibc 2.3.1. */
|
||||
if (posix_memalign(&p, MIN_ALIGNMENT, n))
|
||||
p = (void*) 0;
|
||||
#elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
|
||||
/* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */
|
||||
p = (void *) _mm_malloc(n, MIN_ALIGNMENT);
|
||||
# undef real_free
|
||||
# define real_free _mm_free
|
||||
#else
|
||||
p = malloc(n);
|
||||
#endif
|
||||
|
||||
BENCH_ASSERT(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
void bench_free(void *p)
|
||||
{
|
||||
#ifdef WITH_OUR_MALLOC
|
||||
if (p) free(*((void **) p - 1));
|
||||
#else
|
||||
real_free(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void bench_free0(void *p)
|
||||
{
|
||||
if (p) bench_free(p);
|
||||
}
|
||||
177
fftw-3.3.10/libbench2/verify-dft.c
Normal file
177
fftw-3.3.10/libbench2/verify-dft.c
Normal file
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "verify.h"
|
||||
|
||||
/* copy A into B, using output stride of A and input stride of B */
|
||||
typedef struct {
|
||||
dotens2_closure k;
|
||||
R *ra; R *ia;
|
||||
R *rb; R *ib;
|
||||
int scalea, scaleb;
|
||||
} cpy_closure;
|
||||
|
||||
static void cpy0(dotens2_closure *k_,
|
||||
int indxa, int ondxa, int indxb, int ondxb)
|
||||
{
|
||||
cpy_closure *k = (cpy_closure *)k_;
|
||||
k->rb[indxb * k->scaleb] = k->ra[ondxa * k->scalea];
|
||||
k->ib[indxb * k->scaleb] = k->ia[ondxa * k->scalea];
|
||||
UNUSED(indxa); UNUSED(ondxb);
|
||||
}
|
||||
|
||||
static void cpy(R *ra, R *ia, const bench_tensor *sza, int scalea,
|
||||
R *rb, R *ib, const bench_tensor *szb, int scaleb)
|
||||
{
|
||||
cpy_closure k;
|
||||
k.k.apply = cpy0;
|
||||
k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib;
|
||||
k.scalea = scalea; k.scaleb = scaleb;
|
||||
bench_dotens2(sza, szb, &k.k);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
dofft_closure k;
|
||||
bench_problem *p;
|
||||
} dofft_dft_closure;
|
||||
|
||||
static void dft_apply(dofft_closure *k_, bench_complex *in, bench_complex *out)
|
||||
{
|
||||
dofft_dft_closure *k = (dofft_dft_closure *)k_;
|
||||
bench_problem *p = k->p;
|
||||
bench_tensor *totalsz, *pckdsz;
|
||||
bench_tensor *totalsz_swap, *pckdsz_swap;
|
||||
bench_real *ri, *ii, *ro, *io;
|
||||
int totalscale;
|
||||
|
||||
totalsz = tensor_append(p->vecsz, p->sz);
|
||||
pckdsz = verify_pack(totalsz, 2);
|
||||
ri = (bench_real *) p->in;
|
||||
ro = (bench_real *) p->out;
|
||||
|
||||
totalsz_swap = tensor_copy_swapio(totalsz);
|
||||
pckdsz_swap = tensor_copy_swapio(pckdsz);
|
||||
|
||||
/* confusion: the stride is the distance between complex elements
|
||||
when using interleaved format, but it is the distance between
|
||||
real elements when using split format */
|
||||
if (p->split) {
|
||||
ii = p->ini ? (bench_real *) p->ini : ri + p->iphyssz;
|
||||
io = p->outi ? (bench_real *) p->outi : ro + p->ophyssz;
|
||||
totalscale = 1;
|
||||
} else {
|
||||
ii = p->ini ? (bench_real *) p->ini : ri + 1;
|
||||
io = p->outi ? (bench_real *) p->outi : ro + 1;
|
||||
totalscale = 2;
|
||||
}
|
||||
|
||||
cpy(&c_re(in[0]), &c_im(in[0]), pckdsz, 1,
|
||||
ri, ii, totalsz, totalscale);
|
||||
after_problem_ccopy_from(p, ri, ii);
|
||||
doit(1, p);
|
||||
after_problem_ccopy_to(p, ro, io);
|
||||
if (k->k.recopy_input)
|
||||
cpy(ri, ii, totalsz_swap, totalscale,
|
||||
&c_re(in[0]), &c_im(in[0]), pckdsz_swap, 1);
|
||||
cpy(ro, io, totalsz, totalscale,
|
||||
&c_re(out[0]), &c_im(out[0]), pckdsz, 1);
|
||||
|
||||
tensor_destroy(totalsz);
|
||||
tensor_destroy(pckdsz);
|
||||
tensor_destroy(totalsz_swap);
|
||||
tensor_destroy(pckdsz_swap);
|
||||
}
|
||||
|
||||
void verify_dft(bench_problem *p, int rounds, double tol, errors *e)
|
||||
{
|
||||
C *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
|
||||
int n, vecn, N;
|
||||
dofft_dft_closure k;
|
||||
|
||||
BENCH_ASSERT(p->kind == PROBLEM_COMPLEX);
|
||||
|
||||
k.k.apply = dft_apply;
|
||||
k.k.recopy_input = 0;
|
||||
k.p = p;
|
||||
|
||||
if (rounds == 0)
|
||||
rounds = 20; /* default value */
|
||||
|
||||
n = tensor_sz(p->sz);
|
||||
vecn = tensor_sz(p->vecsz);
|
||||
N = n * vecn;
|
||||
|
||||
inA = (C *) bench_malloc(N * sizeof(C));
|
||||
inB = (C *) bench_malloc(N * sizeof(C));
|
||||
inC = (C *) bench_malloc(N * sizeof(C));
|
||||
outA = (C *) bench_malloc(N * sizeof(C));
|
||||
outB = (C *) bench_malloc(N * sizeof(C));
|
||||
outC = (C *) bench_malloc(N * sizeof(C));
|
||||
tmp = (C *) bench_malloc(N * sizeof(C));
|
||||
|
||||
e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol);
|
||||
e->l = linear(&k.k, 0, N, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol);
|
||||
|
||||
e->s = 0.0;
|
||||
e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign,
|
||||
inA, inB, outA, outB,
|
||||
tmp, rounds, tol, TIME_SHIFT));
|
||||
e->s = dmax(e->s, tf_shift(&k.k, 0, p->sz, n, vecn, p->sign,
|
||||
inA, inB, outA, outB,
|
||||
tmp, rounds, tol, FREQ_SHIFT));
|
||||
|
||||
if (!p->in_place && !p->destroy_input)
|
||||
preserves_input(&k.k, 0, N, inA, inB, outB, rounds);
|
||||
|
||||
bench_free(tmp);
|
||||
bench_free(outC);
|
||||
bench_free(outB);
|
||||
bench_free(outA);
|
||||
bench_free(inC);
|
||||
bench_free(inB);
|
||||
bench_free(inA);
|
||||
}
|
||||
|
||||
|
||||
void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6])
|
||||
{
|
||||
dofft_dft_closure k;
|
||||
int n;
|
||||
C *a, *b;
|
||||
|
||||
BENCH_ASSERT(p->kind == PROBLEM_COMPLEX);
|
||||
BENCH_ASSERT(p->sz->rnk == 1);
|
||||
BENCH_ASSERT(p->vecsz->rnk == 0);
|
||||
|
||||
k.k.apply = dft_apply;
|
||||
k.k.recopy_input = 0;
|
||||
k.p = p;
|
||||
n = tensor_sz(p->sz);
|
||||
|
||||
a = (C *) bench_malloc(n * sizeof(C));
|
||||
b = (C *) bench_malloc(n * sizeof(C));
|
||||
accuracy_test(&k.k, 0, p->sign, n, a, b, rounds, impulse_rounds, t);
|
||||
bench_free(b);
|
||||
bench_free(a);
|
||||
}
|
||||
545
fftw-3.3.10/libbench2/verify-lib.c
Normal file
545
fftw-3.3.10/libbench2/verify-lib.c
Normal file
@@ -0,0 +1,545 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "verify.h"
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*
|
||||
* Utility functions:
|
||||
*/
|
||||
static double dabs(double x) { return (x < 0.0) ? -x : x; }
|
||||
static double dmin(double x, double y) { return (x < y) ? x : y; }
|
||||
static double norm2(double x, double y) { return dmax(dabs(x), dabs(y)); }
|
||||
|
||||
double dmax(double x, double y) { return (x > y) ? x : y; }
|
||||
|
||||
static double aerror(C *a, C *b, int n)
|
||||
{
|
||||
if (n > 0) {
|
||||
/* compute the relative Linf error */
|
||||
double e = 0.0, mag = 0.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
e = dmax(e, norm2(c_re(a[i]) - c_re(b[i]),
|
||||
c_im(a[i]) - c_im(b[i])));
|
||||
mag = dmax(mag,
|
||||
dmin(norm2(c_re(a[i]), c_im(a[i])),
|
||||
norm2(c_re(b[i]), c_im(b[i]))));
|
||||
}
|
||||
e /= mag;
|
||||
|
||||
#ifdef HAVE_ISNAN
|
||||
BENCH_ASSERT(!isnan(e));
|
||||
#endif
|
||||
return e;
|
||||
} else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_DRAND48
|
||||
# if defined(HAVE_DECL_DRAND48) && !HAVE_DECL_DRAND48
|
||||
extern double drand48(void);
|
||||
# endif
|
||||
double mydrand(void)
|
||||
{
|
||||
return drand48() - 0.5;
|
||||
}
|
||||
#else
|
||||
double mydrand(void)
|
||||
{
|
||||
double d = rand();
|
||||
return (d / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
#endif
|
||||
|
||||
void arand(C *a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* generate random inputs */
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_re(a[i]) = mydrand();
|
||||
c_im(a[i]) = mydrand();
|
||||
}
|
||||
}
|
||||
|
||||
/* make array real */
|
||||
void mkreal(C *A, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_im(A[i]) = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
static void assign_conj(C *Ac, C *A, int rank, const bench_iodim *dim, int stride)
|
||||
{
|
||||
if (rank == 0) {
|
||||
c_re(*Ac) = c_re(*A);
|
||||
c_im(*Ac) = -c_im(*A);
|
||||
}
|
||||
else {
|
||||
int i, n0 = dim[rank - 1].n, s = stride;
|
||||
rank -= 1;
|
||||
stride *= n0;
|
||||
assign_conj(Ac, A, rank, dim, stride);
|
||||
for (i = 1; i < n0; ++i)
|
||||
assign_conj(Ac + (n0 - i) * s, A + i * s, rank, dim, stride);
|
||||
}
|
||||
}
|
||||
|
||||
/* make array hermitian */
|
||||
void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride)
|
||||
{
|
||||
if (rank == 0)
|
||||
c_im(*A) = 0.0;
|
||||
else {
|
||||
int i, n0 = dim[rank - 1].n, s = stride;
|
||||
rank -= 1;
|
||||
stride *= n0;
|
||||
mkhermitian(A, rank, dim, stride);
|
||||
for (i = 1; 2*i < n0; ++i)
|
||||
assign_conj(A + (n0 - i) * s, A + i * s, rank, dim, stride);
|
||||
if (2*i == n0)
|
||||
mkhermitian(A + i * s, rank, dim, stride);
|
||||
}
|
||||
}
|
||||
|
||||
void mkhermitian1(C *a, int n)
|
||||
{
|
||||
bench_iodim d;
|
||||
|
||||
d.n = n;
|
||||
d.is = d.os = 1;
|
||||
mkhermitian(a, 1, &d, 1);
|
||||
}
|
||||
|
||||
/* C = A */
|
||||
void acopy(C *c, C *a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_re(c[i]) = c_re(a[i]);
|
||||
c_im(c[i]) = c_im(a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* C = A + B */
|
||||
void aadd(C *c, C *a, C *b, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_re(c[i]) = c_re(a[i]) + c_re(b[i]);
|
||||
c_im(c[i]) = c_im(a[i]) + c_im(b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* C = A - B */
|
||||
void asub(C *c, C *a, C *b, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c_re(c[i]) = c_re(a[i]) - c_re(b[i]);
|
||||
c_im(c[i]) = c_im(a[i]) - c_im(b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* B = rotate left A (complex) */
|
||||
void arol(C *b, C *a, int n, int nb, int na)
|
||||
{
|
||||
int i, ib, ia;
|
||||
|
||||
for (ib = 0; ib < nb; ++ib) {
|
||||
for (i = 0; i < n - 1; ++i)
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
C *pb = b + (ib * n + i) * na + ia;
|
||||
C *pa = a + (ib * n + i + 1) * na + ia;
|
||||
c_re(*pb) = c_re(*pa);
|
||||
c_im(*pb) = c_im(*pa);
|
||||
}
|
||||
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
C *pb = b + (ib * n + n - 1) * na + ia;
|
||||
C *pa = a + ib * n * na + ia;
|
||||
c_re(*pb) = c_re(*pa);
|
||||
c_im(*pb) = c_im(*pa);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void aphase_shift(C *b, C *a, int n, int nb, int na, double sign)
|
||||
{
|
||||
int j, jb, ja;
|
||||
trigreal twopin;
|
||||
twopin = K2PI / n;
|
||||
|
||||
for (jb = 0; jb < nb; ++jb)
|
||||
for (j = 0; j < n; ++j) {
|
||||
trigreal s = sign * SIN(j * twopin);
|
||||
trigreal c = COS(j * twopin);
|
||||
|
||||
for (ja = 0; ja < na; ++ja) {
|
||||
int k = (jb * n + j) * na + ja;
|
||||
c_re(b[k]) = c_re(a[k]) * c - c_im(a[k]) * s;
|
||||
c_im(b[k]) = c_re(a[k]) * s + c_im(a[k]) * c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A = alpha * A (complex, in place) */
|
||||
void ascale(C *a, C alpha, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
R xr = c_re(a[i]), xi = c_im(a[i]);
|
||||
c_re(a[i]) = xr * c_re(alpha) - xi * c_im(alpha);
|
||||
c_im(a[i]) = xr * c_im(alpha) + xi * c_re(alpha);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double acmp(C *a, C *b, int n, const char *test, double tol)
|
||||
{
|
||||
double d = aerror(a, b, n);
|
||||
if (d > tol) {
|
||||
ovtpvt_err("Found relative error %e (%s)\n", d, test);
|
||||
|
||||
{
|
||||
int i, N;
|
||||
N = n > 300 && verbose <= 2 ? 300 : n;
|
||||
for (i = 0; i < N; ++i)
|
||||
ovtpvt_err("%8d %16.12f %16.12f %16.12f %16.12f\n", i,
|
||||
(double) c_re(a[i]), (double) c_im(a[i]),
|
||||
(double) c_re(b[i]), (double) c_im(b[i]));
|
||||
}
|
||||
|
||||
bench_exit(EXIT_FAILURE);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Implementation of the FFT tester described in
|
||||
*
|
||||
* Funda Erg<72>n. Testing multivariate linear functions: Overcoming the
|
||||
* generator bottleneck. In Proceedings of the Twenty-Seventh Annual
|
||||
* ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
|
||||
* Nevada, 29 May--1 June 1995.
|
||||
*
|
||||
* Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without
|
||||
* the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000).
|
||||
*/
|
||||
|
||||
static double impulse0(dofft_closure *k,
|
||||
int n, int vecn,
|
||||
C *inA, C *inB, C *inC,
|
||||
C *outA, C *outB, C *outC,
|
||||
C *tmp, int rounds, double tol)
|
||||
{
|
||||
int N = n * vecn;
|
||||
double e = 0.0;
|
||||
int j;
|
||||
|
||||
k->apply(k, inA, tmp);
|
||||
e = dmax(e, acmp(tmp, outA, N, "impulse 1", tol));
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
arand(inB, N);
|
||||
asub(inC, inA, inB, N);
|
||||
k->apply(k, inB, outB);
|
||||
k->apply(k, inC, outC);
|
||||
aadd(tmp, outB, outC, N);
|
||||
e = dmax(e, acmp(tmp, outA, N, "impulse", tol));
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
double impulse(dofft_closure *k,
|
||||
int n, int vecn,
|
||||
C *inA, C *inB, C *inC,
|
||||
C *outA, C *outB, C *outC,
|
||||
C *tmp, int rounds, double tol)
|
||||
{
|
||||
int i, j;
|
||||
double e = 0.0;
|
||||
|
||||
/* check impulsive input */
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
R x = (sqrt(n)*(i+1)) / (double)(vecn+1);
|
||||
for (j = 0; j < n; ++j) {
|
||||
c_re(inA[j + i * n]) = 0;
|
||||
c_im(inA[j + i * n]) = 0;
|
||||
c_re(outA[j + i * n]) = x;
|
||||
c_im(outA[j + i * n]) = 0;
|
||||
}
|
||||
c_re(inA[i * n]) = x;
|
||||
c_im(inA[i * n]) = 0;
|
||||
}
|
||||
|
||||
e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol));
|
||||
|
||||
/* check constant input */
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
R x = (i+1) / ((double)(vecn+1) * sqrt(n));
|
||||
for (j = 0; j < n; ++j) {
|
||||
c_re(inA[j + i * n]) = x;
|
||||
c_im(inA[j + i * n]) = 0;
|
||||
c_re(outA[j + i * n]) = 0;
|
||||
c_im(outA[j + i * n]) = 0;
|
||||
}
|
||||
c_re(outA[i * n]) = n * x;
|
||||
c_im(outA[i * n]) = 0;
|
||||
}
|
||||
|
||||
e = dmax(e, impulse0(k, n, vecn, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol));
|
||||
return e;
|
||||
}
|
||||
|
||||
double linear(dofft_closure *k, int realp,
|
||||
int n, C *inA, C *inB, C *inC, C *outA,
|
||||
C *outB, C *outC, C *tmp, int rounds, double tol)
|
||||
{
|
||||
int j;
|
||||
double e = 0.0;
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
C alpha, beta;
|
||||
c_re(alpha) = mydrand();
|
||||
c_im(alpha) = realp ? 0.0 : mydrand();
|
||||
c_re(beta) = mydrand();
|
||||
c_im(beta) = realp ? 0.0 : mydrand();
|
||||
arand(inA, n);
|
||||
arand(inB, n);
|
||||
k->apply(k, inA, outA);
|
||||
k->apply(k, inB, outB);
|
||||
|
||||
ascale(outA, alpha, n);
|
||||
ascale(outB, beta, n);
|
||||
aadd(tmp, outA, outB, n);
|
||||
ascale(inA, alpha, n);
|
||||
ascale(inB, beta, n);
|
||||
aadd(inC, inA, inB, n);
|
||||
k->apply(k, inC, outC);
|
||||
|
||||
e = dmax(e, acmp(outC, tmp, n, "linear", tol));
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double tf_shift(dofft_closure *k,
|
||||
int realp, const bench_tensor *sz,
|
||||
int n, int vecn, double sign,
|
||||
C *inA, C *inB, C *outA, C *outB, C *tmp,
|
||||
int rounds, double tol, int which_shift)
|
||||
{
|
||||
int nb, na, dim, N = n * vecn;
|
||||
int i, j;
|
||||
double e = 0.0;
|
||||
|
||||
/* test 3: check the time-shift property */
|
||||
/* the paper performs more tests, but this code should be fine too */
|
||||
|
||||
nb = 1;
|
||||
na = n;
|
||||
|
||||
/* check shifts across all SZ dimensions */
|
||||
for (dim = 0; dim < sz->rnk; ++dim) {
|
||||
int ncur = sz->dims[dim].n;
|
||||
|
||||
na /= ncur;
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
arand(inA, N);
|
||||
|
||||
if (which_shift == TIME_SHIFT) {
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
if (realp) mkreal(inA + i * n, n);
|
||||
arol(inB + i * n, inA + i * n, ncur, nb, na);
|
||||
}
|
||||
k->apply(k, inA, outA);
|
||||
k->apply(k, inB, outB);
|
||||
for (i = 0; i < vecn; ++i)
|
||||
aphase_shift(tmp + i * n, outB + i * n, ncur,
|
||||
nb, na, sign);
|
||||
e = dmax(e, acmp(tmp, outA, N, "time shift", tol));
|
||||
} else {
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
if (realp)
|
||||
mkhermitian(inA + i * n, sz->rnk, sz->dims, 1);
|
||||
aphase_shift(inB + i * n, inA + i * n, ncur,
|
||||
nb, na, -sign);
|
||||
}
|
||||
k->apply(k, inA, outA);
|
||||
k->apply(k, inB, outB);
|
||||
for (i = 0; i < vecn; ++i)
|
||||
arol(tmp + i * n, outB + i * n, ncur, nb, na);
|
||||
e = dmax(e, acmp(tmp, outA, N, "freq shift", tol));
|
||||
}
|
||||
}
|
||||
|
||||
nb *= ncur;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
|
||||
void preserves_input(dofft_closure *k, aconstrain constrain,
|
||||
int n, C *inA, C *inB, C *outB, int rounds)
|
||||
{
|
||||
int j;
|
||||
int recopy_input = k->recopy_input;
|
||||
|
||||
k->recopy_input = 1;
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
arand(inA, n);
|
||||
if (constrain)
|
||||
constrain(inA, n);
|
||||
|
||||
acopy(inB, inA, n);
|
||||
k->apply(k, inB, outB);
|
||||
acmp(inB, inA, n, "preserves_input", 0.0);
|
||||
}
|
||||
k->recopy_input = recopy_input;
|
||||
}
|
||||
|
||||
|
||||
/* Make a copy of the size tensor, with the same dimensions, but with
|
||||
the strides corresponding to a "packed" row-major array with the
|
||||
given stride. */
|
||||
bench_tensor *verify_pack(const bench_tensor *sz, int s)
|
||||
{
|
||||
bench_tensor *x = tensor_copy(sz);
|
||||
if (BENCH_FINITE_RNK(x->rnk) && x->rnk > 0) {
|
||||
int i;
|
||||
x->dims[x->rnk - 1].is = s;
|
||||
x->dims[x->rnk - 1].os = s;
|
||||
for (i = x->rnk - 1; i > 0; --i) {
|
||||
x->dims[i - 1].is = x->dims[i].is * x->dims[i].n;
|
||||
x->dims[i - 1].os = x->dims[i].os * x->dims[i].n;
|
||||
}
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
static int all_zero(C *a, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
if (c_re(a[i]) != 0.0 || c_im(a[i]) != 0.0)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int one_accuracy_test(dofft_closure *k, aconstrain constrain,
|
||||
int sign, int n, C *a, C *b,
|
||||
double t[6])
|
||||
{
|
||||
double err[6];
|
||||
|
||||
if (constrain)
|
||||
constrain(a, n);
|
||||
|
||||
if (all_zero(a, n))
|
||||
return 0;
|
||||
|
||||
k->apply(k, a, b);
|
||||
fftaccuracy(n, a, b, sign, err);
|
||||
|
||||
t[0] += err[0];
|
||||
t[1] += err[1] * err[1];
|
||||
t[2] = dmax(t[2], err[2]);
|
||||
t[3] += err[3];
|
||||
t[4] += err[4] * err[4];
|
||||
t[5] = dmax(t[5], err[5]);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void accuracy_test(dofft_closure *k, aconstrain constrain,
|
||||
int sign, int n, C *a, C *b, int rounds, int impulse_rounds,
|
||||
double t[6])
|
||||
{
|
||||
int r, i;
|
||||
int ntests = 0;
|
||||
bench_complex czero = {0, 0};
|
||||
|
||||
for (i = 0; i < 6; ++i) t[i] = 0.0;
|
||||
|
||||
for (r = 0; r < rounds; ++r) {
|
||||
arand(a, n);
|
||||
if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
||||
++ntests;
|
||||
}
|
||||
|
||||
/* impulses at beginning of array */
|
||||
for (r = 0; r < impulse_rounds; ++r) {
|
||||
if (r > n - r - 1)
|
||||
continue;
|
||||
|
||||
caset(a, n, czero);
|
||||
c_re(a[r]) = c_im(a[r]) = 1.0;
|
||||
|
||||
if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
||||
++ntests;
|
||||
}
|
||||
|
||||
/* impulses at end of array */
|
||||
for (r = 0; r < impulse_rounds; ++r) {
|
||||
if (r <= n - r - 1)
|
||||
continue;
|
||||
|
||||
caset(a, n, czero);
|
||||
c_re(a[n - r - 1]) = c_im(a[n - r - 1]) = 1.0;
|
||||
|
||||
if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
||||
++ntests;
|
||||
}
|
||||
|
||||
/* randomly-located impulses */
|
||||
for (r = 0; r < impulse_rounds; ++r) {
|
||||
caset(a, n, czero);
|
||||
i = rand() % n;
|
||||
c_re(a[i]) = c_im(a[i]) = 1.0;
|
||||
|
||||
if (one_accuracy_test(k, constrain, sign, n, a, b, t))
|
||||
++ntests;
|
||||
}
|
||||
|
||||
t[0] /= ntests;
|
||||
t[1] = sqrt(t[1] / ntests);
|
||||
t[3] /= ntests;
|
||||
t[4] = sqrt(t[4] / ntests);
|
||||
|
||||
fftaccuracy_done();
|
||||
}
|
||||
964
fftw-3.3.10/libbench2/verify-r2r.c
Normal file
964
fftw-3.3.10/libbench2/verify-r2r.c
Normal file
@@ -0,0 +1,964 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* Lots of ugly duplication from verify-lib.c, plus lots of ugliness in
|
||||
general for all of the r2r variants...oh well, for now */
|
||||
|
||||
#include "verify.h"
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
bench_problem *p;
|
||||
bench_tensor *probsz;
|
||||
bench_tensor *totalsz;
|
||||
bench_tensor *pckdsz;
|
||||
bench_tensor *pckdvecsz;
|
||||
} info;
|
||||
|
||||
/*
|
||||
* Utility functions:
|
||||
*/
|
||||
|
||||
static double dabs(double x) { return (x < 0.0) ? -x : x; }
|
||||
static double dmin(double x, double y) { return (x < y) ? x : y; }
|
||||
|
||||
static double raerror(R *a, R *b, int n)
|
||||
{
|
||||
if (n > 0) {
|
||||
/* compute the relative Linf error */
|
||||
double e = 0.0, mag = 0.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
e = dmax(e, dabs(a[i] - b[i]));
|
||||
mag = dmax(mag, dmin(dabs(a[i]), dabs(b[i])));
|
||||
}
|
||||
if (dabs(mag) < 1e-14 && dabs(e) < 1e-14)
|
||||
e = 0.0;
|
||||
else
|
||||
e /= mag;
|
||||
|
||||
#ifdef HAVE_ISNAN
|
||||
BENCH_ASSERT(!isnan(e));
|
||||
#endif
|
||||
return e;
|
||||
} else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
#define by2pi(m, n) ((K2PI * (m)) / (n))
|
||||
|
||||
/*
|
||||
* Improve accuracy by reducing x to range [0..1/8]
|
||||
* before multiplication by 2 * PI.
|
||||
*/
|
||||
|
||||
static trigreal bench_sincos(trigreal m, trigreal n, int sinp)
|
||||
{
|
||||
/* waiting for C to get tail recursion... */
|
||||
trigreal half_n = n * 0.5;
|
||||
trigreal quarter_n = half_n * 0.5;
|
||||
trigreal eighth_n = quarter_n * 0.5;
|
||||
trigreal sgn = 1.0;
|
||||
|
||||
if (sinp) goto sin;
|
||||
cos:
|
||||
if (m < 0) { m = -m; /* goto cos; */ }
|
||||
if (m > half_n) { m = n - m; goto cos; }
|
||||
if (m > eighth_n) { m = quarter_n - m; goto sin; }
|
||||
return sgn * COS(by2pi(m, n));
|
||||
|
||||
msin:
|
||||
sgn = -sgn;
|
||||
sin:
|
||||
if (m < 0) { m = -m; goto msin; }
|
||||
if (m > half_n) { m = n - m; goto msin; }
|
||||
if (m > eighth_n) { m = quarter_n - m; goto cos; }
|
||||
return sgn * SIN(by2pi(m, n));
|
||||
}
|
||||
|
||||
static trigreal cos2pi(int m, int n)
|
||||
{
|
||||
return bench_sincos((trigreal)m, (trigreal)n, 0);
|
||||
}
|
||||
|
||||
static trigreal sin2pi(int m, int n)
|
||||
{
|
||||
return bench_sincos((trigreal)m, (trigreal)n, 1);
|
||||
}
|
||||
|
||||
static trigreal cos00(int i, int j, int n)
|
||||
{
|
||||
return cos2pi(i * j, n);
|
||||
}
|
||||
|
||||
static trigreal cos01(int i, int j, int n)
|
||||
{
|
||||
return cos00(i, 2*j + 1, 2*n);
|
||||
}
|
||||
|
||||
static trigreal cos10(int i, int j, int n)
|
||||
{
|
||||
return cos00(2*i + 1, j, 2*n);
|
||||
}
|
||||
|
||||
static trigreal cos11(int i, int j, int n)
|
||||
{
|
||||
return cos00(2*i + 1, 2*j + 1, 4*n);
|
||||
}
|
||||
|
||||
static trigreal sin00(int i, int j, int n)
|
||||
{
|
||||
return sin2pi(i * j, n);
|
||||
}
|
||||
|
||||
static trigreal sin01(int i, int j, int n)
|
||||
{
|
||||
return sin00(i, 2*j + 1, 2*n);
|
||||
}
|
||||
|
||||
static trigreal sin10(int i, int j, int n)
|
||||
{
|
||||
return sin00(2*i + 1, j, 2*n);
|
||||
}
|
||||
|
||||
static trigreal sin11(int i, int j, int n)
|
||||
{
|
||||
return sin00(2*i + 1, 2*j + 1, 4*n);
|
||||
}
|
||||
|
||||
static trigreal realhalf(int i, int j, int n)
|
||||
{
|
||||
UNUSED(i);
|
||||
if (j <= n - j)
|
||||
return 1.0;
|
||||
else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
static trigreal coshalf(int i, int j, int n)
|
||||
{
|
||||
if (j <= n - j)
|
||||
return cos00(i, j, n);
|
||||
else
|
||||
return cos00(i, n - j, n);
|
||||
}
|
||||
|
||||
static trigreal unity(int i, int j, int n)
|
||||
{
|
||||
UNUSED(i);
|
||||
UNUSED(j);
|
||||
UNUSED(n);
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
typedef trigreal (*trigfun)(int, int, int);
|
||||
|
||||
static void rarand(R *a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* generate random inputs */
|
||||
for (i = 0; i < n; ++i) {
|
||||
a[i] = mydrand();
|
||||
}
|
||||
}
|
||||
|
||||
/* C = A + B */
|
||||
static void raadd(R *c, R *a, R *b, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* C = A - B */
|
||||
static void rasub(R *c, R *a, R *b, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
c[i] = a[i] - b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* B = rotate left A + rotate right A */
|
||||
static void rarolr(R *b, R *a, int n, int nb, int na,
|
||||
r2r_kind_t k)
|
||||
{
|
||||
int isL0 = 0, isL1 = 0, isR0 = 0, isR1 = 0;
|
||||
int i, ib, ia;
|
||||
|
||||
for (ib = 0; ib < nb; ++ib) {
|
||||
for (i = 0; i < n - 1; ++i)
|
||||
for (ia = 0; ia < na; ++ia)
|
||||
b[(ib * n + i) * na + ia] =
|
||||
a[(ib * n + i + 1) * na + ia];
|
||||
|
||||
/* ugly switch to do boundary conditions for various r2r types */
|
||||
switch (k) {
|
||||
/* periodic boundaries */
|
||||
case R2R_DHT:
|
||||
case R2R_R2HC:
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
b[(ib * n + n - 1) * na + ia] =
|
||||
a[(ib * n + 0) * na + ia];
|
||||
b[(ib * n + 0) * na + ia] +=
|
||||
a[(ib * n + n - 1) * na + ia];
|
||||
}
|
||||
break;
|
||||
|
||||
case R2R_HC2R: /* ugh (hermitian halfcomplex boundaries) */
|
||||
if (n > 2) {
|
||||
if (n % 2 == 0)
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
b[(ib * n + n - 1) * na + ia] = 0.0;
|
||||
b[(ib * n + 0) * na + ia] +=
|
||||
a[(ib * n + 1) * na + ia];
|
||||
b[(ib * n + n/2) * na + ia] +=
|
||||
+ a[(ib * n + n/2 - 1) * na + ia]
|
||||
- a[(ib * n + n/2 + 1) * na + ia];
|
||||
b[(ib * n + n/2 + 1) * na + ia] +=
|
||||
- a[(ib * n + n/2) * na + ia];
|
||||
}
|
||||
else
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
b[(ib * n + n - 1) * na + ia] = 0.0;
|
||||
b[(ib * n + 0) * na + ia] +=
|
||||
a[(ib * n + 1) * na + ia];
|
||||
b[(ib * n + n/2) * na + ia] +=
|
||||
+ a[(ib * n + n/2) * na + ia]
|
||||
- a[(ib * n + n/2 + 1) * na + ia];
|
||||
b[(ib * n + n/2 + 1) * na + ia] +=
|
||||
- a[(ib * n + n/2 + 1) * na + ia]
|
||||
- a[(ib * n + n/2) * na + ia];
|
||||
}
|
||||
} else /* n <= 2 */ {
|
||||
for (ia = 0; ia < na; ++ia) {
|
||||
b[(ib * n + n - 1) * na + ia] =
|
||||
a[(ib * n + 0) * na + ia];
|
||||
b[(ib * n + 0) * na + ia] +=
|
||||
a[(ib * n + n - 1) * na + ia];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* various even/odd boundary conditions */
|
||||
case R2R_REDFT00:
|
||||
isL1 = isR1 = 1;
|
||||
goto mirrors;
|
||||
case R2R_REDFT01:
|
||||
isL1 = 1;
|
||||
goto mirrors;
|
||||
case R2R_REDFT10:
|
||||
isL0 = isR0 = 1;
|
||||
goto mirrors;
|
||||
case R2R_REDFT11:
|
||||
isL0 = 1;
|
||||
isR0 = -1;
|
||||
goto mirrors;
|
||||
case R2R_RODFT00:
|
||||
goto mirrors;
|
||||
case R2R_RODFT01:
|
||||
isR1 = 1;
|
||||
goto mirrors;
|
||||
case R2R_RODFT10:
|
||||
isL0 = isR0 = -1;
|
||||
goto mirrors;
|
||||
case R2R_RODFT11:
|
||||
isL0 = -1;
|
||||
isR0 = 1;
|
||||
goto mirrors;
|
||||
|
||||
mirrors:
|
||||
|
||||
for (ia = 0; ia < na; ++ia)
|
||||
b[(ib * n + n - 1) * na + ia] =
|
||||
isR0 * a[(ib * n + n - 1) * na + ia]
|
||||
+ (n > 1 ? isR1 * a[(ib * n + n - 2) * na + ia]
|
||||
: 0);
|
||||
|
||||
for (ia = 0; ia < na; ++ia)
|
||||
b[(ib * n) * na + ia] +=
|
||||
isL0 * a[(ib * n) * na + ia]
|
||||
+ (n > 1 ? isL1 * a[(ib * n + 1) * na + ia] : 0);
|
||||
|
||||
}
|
||||
|
||||
for (i = 1; i < n; ++i)
|
||||
for (ia = 0; ia < na; ++ia)
|
||||
b[(ib * n + i) * na + ia] +=
|
||||
a[(ib * n + i - 1) * na + ia];
|
||||
}
|
||||
}
|
||||
|
||||
static void raphase_shift(R *b, R *a, int n, int nb, int na,
|
||||
int n0, int k0, trigfun t)
|
||||
{
|
||||
int j, jb, ja;
|
||||
|
||||
for (jb = 0; jb < nb; ++jb)
|
||||
for (j = 0; j < n; ++j) {
|
||||
trigreal c = 2.0 * t(1, j + k0, n0);
|
||||
|
||||
for (ja = 0; ja < na; ++ja) {
|
||||
int k = (jb * n + j) * na + ja;
|
||||
b[k] = a[k] * c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A = alpha * A (real, in place) */
|
||||
static void rascale(R *a, R alpha, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
a[i] *= alpha;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* compute rdft:
|
||||
*/
|
||||
|
||||
/* copy real A into real B, using output stride of A and input stride of B */
|
||||
typedef struct {
|
||||
dotens2_closure k;
|
||||
R *ra;
|
||||
R *rb;
|
||||
} cpyr_closure;
|
||||
|
||||
static void cpyr0(dotens2_closure *k_,
|
||||
int indxa, int ondxa, int indxb, int ondxb)
|
||||
{
|
||||
cpyr_closure *k = (cpyr_closure *)k_;
|
||||
k->rb[indxb] = k->ra[ondxa];
|
||||
UNUSED(indxa); UNUSED(ondxb);
|
||||
}
|
||||
|
||||
static void cpyr(R *ra, bench_tensor *sza, R *rb, bench_tensor *szb)
|
||||
{
|
||||
cpyr_closure k;
|
||||
k.k.apply = cpyr0;
|
||||
k.ra = ra; k.rb = rb;
|
||||
bench_dotens2(sza, szb, &k.k);
|
||||
}
|
||||
|
||||
static void dofft(info *nfo, R *in, R *out)
|
||||
{
|
||||
cpyr(in, nfo->pckdsz, (R *) nfo->p->in, nfo->totalsz);
|
||||
after_problem_rcopy_from(nfo->p, (bench_real *)nfo->p->in);
|
||||
doit(1, nfo->p);
|
||||
after_problem_rcopy_to(nfo->p, (bench_real *)nfo->p->out);
|
||||
cpyr((R *) nfo->p->out, nfo->totalsz, out, nfo->pckdsz);
|
||||
}
|
||||
|
||||
static double racmp(R *a, R *b, int n, const char *test, double tol)
|
||||
{
|
||||
double d = raerror(a, b, n);
|
||||
if (d > tol) {
|
||||
ovtpvt_err("Found relative error %e (%s)\n", d, test);
|
||||
{
|
||||
int i, N;
|
||||
N = n > 300 && verbose <= 2 ? 300 : n;
|
||||
for (i = 0; i < N; ++i)
|
||||
ovtpvt_err("%8d %16.12f %16.12f\n", i,
|
||||
(double) a[i],
|
||||
(double) b[i]);
|
||||
}
|
||||
bench_exit(EXIT_FAILURE);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
typedef struct {
|
||||
int n; /* physical size */
|
||||
int n0; /* "logical" transform size */
|
||||
int i0, k0; /* shifts of input/output */
|
||||
trigfun ti, ts; /* impulse/shift trig functions */
|
||||
} dim_stuff;
|
||||
|
||||
static void impulse_response(int rnk, dim_stuff *d, R impulse_amp,
|
||||
R *A, int N)
|
||||
{
|
||||
if (rnk == 0)
|
||||
A[0] = impulse_amp;
|
||||
else {
|
||||
int i;
|
||||
N /= d->n;
|
||||
for (i = 0; i < d->n; ++i) {
|
||||
impulse_response(rnk - 1, d + 1,
|
||||
impulse_amp * d->ti(d->i0, d->k0 + i, d->n0),
|
||||
A + i * N, N);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
/*
|
||||
* Implementation of the FFT tester described in
|
||||
*
|
||||
* Funda Erg<72>n. Testing multivariate linear functions: Overcoming the
|
||||
* generator bottleneck. In Proceedings of the Twenty-Seventh Annual
|
||||
* ACM Symposium on the Theory of Computing, pages 407-416, Las Vegas,
|
||||
* Nevada, 29 May--1 June 1995.
|
||||
*
|
||||
* Also: F. Ergun, S. R. Kumar, and D. Sivakumar, "Self-testing without
|
||||
* the generator bottleneck," SIAM J. on Computing 29 (5), 1630-51 (2000).
|
||||
*/
|
||||
|
||||
static double rlinear(int n, info *nfo, R *inA, R *inB, R *inC, R *outA,
|
||||
R *outB, R *outC, R *tmp, int rounds, double tol)
|
||||
{
|
||||
double e = 0.0;
|
||||
int j;
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
R alpha, beta;
|
||||
alpha = mydrand();
|
||||
beta = mydrand();
|
||||
rarand(inA, n);
|
||||
rarand(inB, n);
|
||||
dofft(nfo, inA, outA);
|
||||
dofft(nfo, inB, outB);
|
||||
|
||||
rascale(outA, alpha, n);
|
||||
rascale(outB, beta, n);
|
||||
raadd(tmp, outA, outB, n);
|
||||
rascale(inA, alpha, n);
|
||||
rascale(inB, beta, n);
|
||||
raadd(inC, inA, inB, n);
|
||||
dofft(nfo, inC, outC);
|
||||
|
||||
e = dmax(e, racmp(outC, tmp, n, "linear", tol));
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static double rimpulse(dim_stuff *d, R impulse_amp,
|
||||
int n, int vecn, info *nfo,
|
||||
R *inA, R *inB, R *inC,
|
||||
R *outA, R *outB, R *outC,
|
||||
R *tmp, int rounds, double tol)
|
||||
{
|
||||
double e = 0.0;
|
||||
int N = n * vecn;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
/* test 2: check that the unit impulse is transformed properly */
|
||||
|
||||
for (i = 0; i < N; ++i) {
|
||||
/* pls */
|
||||
inA[i] = 0.0;
|
||||
}
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
inA[i * n] = (i+1) / (double)(vecn+1);
|
||||
|
||||
/* transform of the pls */
|
||||
impulse_response(nfo->probsz->rnk, d, impulse_amp * inA[i * n],
|
||||
outA + i * n, n);
|
||||
}
|
||||
|
||||
dofft(nfo, inA, tmp);
|
||||
e = dmax(e, racmp(tmp, outA, N, "impulse 1", tol));
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
rarand(inB, N);
|
||||
rasub(inC, inA, inB, N);
|
||||
dofft(nfo, inB, outB);
|
||||
dofft(nfo, inC, outC);
|
||||
raadd(tmp, outB, outC, N);
|
||||
e = dmax(e, racmp(tmp, outA, N, "impulse", tol));
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static double t_shift(int n, int vecn, info *nfo,
|
||||
R *inA, R *inB, R *outA, R *outB, R *tmp,
|
||||
int rounds, double tol,
|
||||
dim_stuff *d)
|
||||
{
|
||||
double e = 0.0;
|
||||
int nb, na, dim, N = n * vecn;
|
||||
int i, j;
|
||||
bench_tensor *sz = nfo->probsz;
|
||||
|
||||
/* test 3: check the time-shift property */
|
||||
/* the paper performs more tests, but this code should be fine too */
|
||||
|
||||
nb = 1;
|
||||
na = n;
|
||||
|
||||
/* check shifts across all SZ dimensions */
|
||||
for (dim = 0; dim < sz->rnk; ++dim) {
|
||||
int ncur = sz->dims[dim].n;
|
||||
|
||||
na /= ncur;
|
||||
|
||||
for (j = 0; j < rounds; ++j) {
|
||||
rarand(inA, N);
|
||||
|
||||
for (i = 0; i < vecn; ++i) {
|
||||
rarolr(inB + i * n, inA + i*n, ncur, nb,na,
|
||||
nfo->p->k[dim]);
|
||||
}
|
||||
dofft(nfo, inA, outA);
|
||||
dofft(nfo, inB, outB);
|
||||
for (i = 0; i < vecn; ++i)
|
||||
raphase_shift(tmp + i * n, outA + i * n, ncur,
|
||||
nb, na, d[dim].n0, d[dim].k0, d[dim].ts);
|
||||
e = dmax(e, racmp(tmp, outB, N, "time shift", tol));
|
||||
}
|
||||
|
||||
nb *= ncur;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
void verify_r2r(bench_problem *p, int rounds, double tol, errors *e)
|
||||
{
|
||||
R *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
|
||||
info nfo;
|
||||
int n, vecn, N;
|
||||
double impulse_amp = 1.0;
|
||||
dim_stuff *d;
|
||||
int i;
|
||||
|
||||
if (rounds == 0)
|
||||
rounds = 20; /* default value */
|
||||
|
||||
n = tensor_sz(p->sz);
|
||||
vecn = tensor_sz(p->vecsz);
|
||||
N = n * vecn;
|
||||
|
||||
d = (dim_stuff *) bench_malloc(sizeof(dim_stuff) * p->sz->rnk);
|
||||
for (i = 0; i < p->sz->rnk; ++i) {
|
||||
int n0, i0, k0;
|
||||
trigfun ti, ts;
|
||||
|
||||
d[i].n = n0 = p->sz->dims[i].n;
|
||||
if (p->k[i] > R2R_DHT)
|
||||
n0 = 2 * (n0 + (p->k[i] == R2R_REDFT00 ? -1 :
|
||||
(p->k[i] == R2R_RODFT00 ? 1 : 0)));
|
||||
|
||||
switch (p->k[i]) {
|
||||
case R2R_R2HC:
|
||||
i0 = k0 = 0;
|
||||
ti = realhalf;
|
||||
ts = coshalf;
|
||||
break;
|
||||
case R2R_DHT:
|
||||
i0 = k0 = 0;
|
||||
ti = unity;
|
||||
ts = cos00;
|
||||
break;
|
||||
case R2R_HC2R:
|
||||
i0 = k0 = 0;
|
||||
ti = unity;
|
||||
ts = cos00;
|
||||
break;
|
||||
case R2R_REDFT00:
|
||||
i0 = k0 = 0;
|
||||
ti = ts = cos00;
|
||||
break;
|
||||
case R2R_REDFT01:
|
||||
i0 = k0 = 0;
|
||||
ti = ts = cos01;
|
||||
break;
|
||||
case R2R_REDFT10:
|
||||
i0 = k0 = 0;
|
||||
ti = cos10; impulse_amp *= 2.0;
|
||||
ts = cos00;
|
||||
break;
|
||||
case R2R_REDFT11:
|
||||
i0 = k0 = 0;
|
||||
ti = cos11; impulse_amp *= 2.0;
|
||||
ts = cos01;
|
||||
break;
|
||||
case R2R_RODFT00:
|
||||
i0 = k0 = 1;
|
||||
ti = sin00; impulse_amp *= 2.0;
|
||||
ts = cos00;
|
||||
break;
|
||||
case R2R_RODFT01:
|
||||
i0 = 1; k0 = 0;
|
||||
ti = sin01; impulse_amp *= n == 1 ? 1.0 : 2.0;
|
||||
ts = cos01;
|
||||
break;
|
||||
case R2R_RODFT10:
|
||||
i0 = 0; k0 = 1;
|
||||
ti = sin10; impulse_amp *= 2.0;
|
||||
ts = cos00;
|
||||
break;
|
||||
case R2R_RODFT11:
|
||||
i0 = k0 = 0;
|
||||
ti = sin11; impulse_amp *= 2.0;
|
||||
ts = cos01;
|
||||
break;
|
||||
default:
|
||||
BENCH_ASSERT(0);
|
||||
return;
|
||||
}
|
||||
|
||||
d[i].n0 = n0;
|
||||
d[i].i0 = i0;
|
||||
d[i].k0 = k0;
|
||||
d[i].ti = ti;
|
||||
d[i].ts = ts;
|
||||
}
|
||||
|
||||
|
||||
inA = (R *) bench_malloc(N * sizeof(R));
|
||||
inB = (R *) bench_malloc(N * sizeof(R));
|
||||
inC = (R *) bench_malloc(N * sizeof(R));
|
||||
outA = (R *) bench_malloc(N * sizeof(R));
|
||||
outB = (R *) bench_malloc(N * sizeof(R));
|
||||
outC = (R *) bench_malloc(N * sizeof(R));
|
||||
tmp = (R *) bench_malloc(N * sizeof(R));
|
||||
|
||||
nfo.p = p;
|
||||
nfo.probsz = p->sz;
|
||||
nfo.totalsz = tensor_append(p->vecsz, nfo.probsz);
|
||||
nfo.pckdsz = verify_pack(nfo.totalsz, 1);
|
||||
nfo.pckdvecsz = verify_pack(p->vecsz, tensor_sz(nfo.probsz));
|
||||
|
||||
e->i = rimpulse(d, impulse_amp, n, vecn, &nfo,
|
||||
inA, inB, inC, outA, outB, outC, tmp, rounds, tol);
|
||||
e->l = rlinear(N, &nfo, inA, inB, inC, outA, outB, outC, tmp, rounds,tol);
|
||||
e->s = t_shift(n, vecn, &nfo, inA, inB, outA, outB, tmp,
|
||||
rounds, tol, d);
|
||||
|
||||
/* grr, verify-lib.c:preserves_input() only works for complex */
|
||||
if (!p->in_place && !p->destroy_input) {
|
||||
bench_tensor *totalsz_swap, *pckdsz_swap;
|
||||
totalsz_swap = tensor_copy_swapio(nfo.totalsz);
|
||||
pckdsz_swap = tensor_copy_swapio(nfo.pckdsz);
|
||||
|
||||
for (i = 0; i < rounds; ++i) {
|
||||
rarand(inA, N);
|
||||
dofft(&nfo, inA, outB);
|
||||
cpyr((R *) nfo.p->in, totalsz_swap, inB, pckdsz_swap);
|
||||
racmp(inB, inA, N, "preserves_input", 0.0);
|
||||
}
|
||||
|
||||
tensor_destroy(totalsz_swap);
|
||||
tensor_destroy(pckdsz_swap);
|
||||
}
|
||||
|
||||
tensor_destroy(nfo.totalsz);
|
||||
tensor_destroy(nfo.pckdsz);
|
||||
tensor_destroy(nfo.pckdvecsz);
|
||||
bench_free(tmp);
|
||||
bench_free(outC);
|
||||
bench_free(outB);
|
||||
bench_free(outA);
|
||||
bench_free(inC);
|
||||
bench_free(inB);
|
||||
bench_free(inA);
|
||||
bench_free(d);
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
dofft_closure k;
|
||||
bench_problem *p;
|
||||
int n0;
|
||||
} dofft_r2r_closure;
|
||||
|
||||
static void cpyr1(int n, R *in, int is, R *out, int os, R scale)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
out[i * os] = in[i * is] * scale;
|
||||
}
|
||||
|
||||
static void mke00(C *a, int n, int c)
|
||||
{
|
||||
int i;
|
||||
for (i = 1; i + i < n; ++i)
|
||||
a[n - i][c] = a[i][c];
|
||||
}
|
||||
|
||||
static void mkre00(C *a, int n)
|
||||
{
|
||||
mkreal(a, n);
|
||||
mke00(a, n, 0);
|
||||
}
|
||||
|
||||
static void mkimag(C *a, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
c_re(a[i]) = 0.0;
|
||||
}
|
||||
|
||||
static void mko00(C *a, int n, int c)
|
||||
{
|
||||
int i;
|
||||
a[0][c] = 0.0;
|
||||
for (i = 1; i + i < n; ++i)
|
||||
a[n - i][c] = -a[i][c];
|
||||
if (i + i == n)
|
||||
a[i][c] = 0.0;
|
||||
}
|
||||
|
||||
static void mkro00(C *a, int n)
|
||||
{
|
||||
mkreal(a, n);
|
||||
mko00(a, n, 0);
|
||||
}
|
||||
|
||||
static void mkio00(C *a, int n)
|
||||
{
|
||||
mkimag(a, n);
|
||||
mko00(a, n, 1);
|
||||
}
|
||||
|
||||
static void mkre01(C *a, int n) /* n should be be multiple of 4 */
|
||||
{
|
||||
R a0;
|
||||
a0 = c_re(a[0]);
|
||||
mko00(a, n/2, 0);
|
||||
c_re(a[n/2]) = -(c_re(a[0]) = a0);
|
||||
mkre00(a, n);
|
||||
}
|
||||
|
||||
static void mkro01(C *a, int n) /* n should be be multiple of 4 */
|
||||
{
|
||||
c_re(a[0]) = c_im(a[0]) = 0.0;
|
||||
mkre00(a, n/2);
|
||||
mkro00(a, n);
|
||||
}
|
||||
|
||||
static void mkoddonly(C *a, int n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n; i += 2)
|
||||
c_re(a[i]) = c_im(a[i]) = 0.0;
|
||||
}
|
||||
|
||||
static void mkre10(C *a, int n)
|
||||
{
|
||||
mkoddonly(a, n);
|
||||
mkre00(a, n);
|
||||
}
|
||||
|
||||
static void mkio10(C *a, int n)
|
||||
{
|
||||
mkoddonly(a, n);
|
||||
mkio00(a, n);
|
||||
}
|
||||
|
||||
static void mkre11(C *a, int n)
|
||||
{
|
||||
mkoddonly(a, n);
|
||||
mko00(a, n/2, 0);
|
||||
mkre00(a, n);
|
||||
}
|
||||
|
||||
static void mkro11(C *a, int n)
|
||||
{
|
||||
mkoddonly(a, n);
|
||||
mkre00(a, n/2);
|
||||
mkro00(a, n);
|
||||
}
|
||||
|
||||
static void mkio11(C *a, int n)
|
||||
{
|
||||
mkoddonly(a, n);
|
||||
mke00(a, n/2, 1);
|
||||
mkio00(a, n);
|
||||
}
|
||||
|
||||
static void r2r_apply(dofft_closure *k_, bench_complex *in, bench_complex *out)
|
||||
{
|
||||
dofft_r2r_closure *k = (dofft_r2r_closure *)k_;
|
||||
bench_problem *p = k->p;
|
||||
bench_real *ri, *ro;
|
||||
int n, is, os;
|
||||
|
||||
n = p->sz->dims[0].n;
|
||||
is = p->sz->dims[0].is;
|
||||
os = p->sz->dims[0].os;
|
||||
|
||||
ri = (bench_real *) p->in;
|
||||
ro = (bench_real *) p->out;
|
||||
|
||||
switch (p->k[0]) {
|
||||
case R2R_R2HC:
|
||||
cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_HC2R:
|
||||
cpyr1(n/2 + 1, &c_re(in[0]), 2, ri, is, 1.0);
|
||||
cpyr1((n+1)/2 - 1, &c_im(in[n-1]), -2, ri + is*(n-1), -is, 1.0);
|
||||
break;
|
||||
case R2R_REDFT00:
|
||||
cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_RODFT00:
|
||||
cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_REDFT01:
|
||||
cpyr1(n, &c_re(in[0]), 2, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_REDFT10:
|
||||
cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_RODFT01:
|
||||
cpyr1(n, &c_re(in[1]), 2, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_RODFT10:
|
||||
cpyr1(n, &c_im(in[1]), 4, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_REDFT11:
|
||||
cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
|
||||
break;
|
||||
case R2R_RODFT11:
|
||||
cpyr1(n, &c_re(in[1]), 4, ri, is, 1.0);
|
||||
break;
|
||||
default:
|
||||
BENCH_ASSERT(0); /* not yet implemented */
|
||||
}
|
||||
|
||||
after_problem_rcopy_from(p, ri);
|
||||
doit(1, p);
|
||||
after_problem_rcopy_to(p, ro);
|
||||
|
||||
switch (p->k[0]) {
|
||||
case R2R_R2HC:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
|
||||
cpyr1(n/2 + 1, ro, os, &c_re(out[0]), 2, 1.0);
|
||||
cpyr1((n+1)/2 - 1, ro + os*(n-1), -os, &c_im(out[1]), 2, 1.0);
|
||||
c_im(out[0]) = 0.0;
|
||||
if (n % 2 == 0)
|
||||
c_im(out[n/2]) = 0.0;
|
||||
mkhermitian1(out, n);
|
||||
break;
|
||||
case R2R_HC2R:
|
||||
if (k->k.recopy_input) {
|
||||
cpyr1(n/2 + 1, ri, is, &c_re(in[0]), 2, 1.0);
|
||||
cpyr1((n+1)/2 - 1, ri + is*(n-1), -is, &c_im(in[1]), 2,1.0);
|
||||
}
|
||||
cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
|
||||
mkreal(out, n);
|
||||
break;
|
||||
case R2R_REDFT00:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
|
||||
cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
|
||||
mkre00(out, k->n0);
|
||||
break;
|
||||
case R2R_RODFT00:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_im(in[1]), 2, -1.0);
|
||||
cpyr1(n, ro, os, &c_im(out[1]), 2, -1.0);
|
||||
mkio00(out, k->n0);
|
||||
break;
|
||||
case R2R_REDFT01:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[0]), 2, 1.0);
|
||||
cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
|
||||
mkre10(out, k->n0);
|
||||
break;
|
||||
case R2R_REDFT10:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
|
||||
cpyr1(n, ro, os, &c_re(out[0]), 2, 1.0);
|
||||
mkre01(out, k->n0);
|
||||
break;
|
||||
case R2R_RODFT01:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[1]), 2, 1.0);
|
||||
cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
|
||||
mkio10(out, k->n0);
|
||||
break;
|
||||
case R2R_RODFT10:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
|
||||
cpyr1(n, ro, os, &c_re(out[1]), 2, 1.0);
|
||||
mkro01(out, k->n0);
|
||||
break;
|
||||
case R2R_REDFT11:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_re(in[1]), 4, 2.0);
|
||||
cpyr1(n, ro, os, &c_re(out[1]), 4, 2.0);
|
||||
mkre11(out, k->n0);
|
||||
break;
|
||||
case R2R_RODFT11:
|
||||
if (k->k.recopy_input)
|
||||
cpyr1(n, ri, is, &c_im(in[1]), 4, -2.0);
|
||||
cpyr1(n, ro, os, &c_im(out[1]), 4, -2.0);
|
||||
mkio11(out, k->n0);
|
||||
break;
|
||||
default:
|
||||
BENCH_ASSERT(0); /* not yet implemented */
|
||||
}
|
||||
}
|
||||
|
||||
void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6])
|
||||
{
|
||||
dofft_r2r_closure k;
|
||||
int n, n0 = 1;
|
||||
C *a, *b;
|
||||
aconstrain constrain = 0;
|
||||
|
||||
BENCH_ASSERT(p->kind == PROBLEM_R2R);
|
||||
BENCH_ASSERT(p->sz->rnk == 1);
|
||||
BENCH_ASSERT(p->vecsz->rnk == 0);
|
||||
|
||||
k.k.apply = r2r_apply;
|
||||
k.k.recopy_input = 0;
|
||||
k.p = p;
|
||||
n = tensor_sz(p->sz);
|
||||
|
||||
switch (p->k[0]) {
|
||||
case R2R_R2HC: constrain = mkreal; n0 = n; break;
|
||||
case R2R_HC2R: constrain = mkhermitian1; n0 = n; break;
|
||||
case R2R_REDFT00: constrain = mkre00; n0 = 2*(n-1); break;
|
||||
case R2R_RODFT00: constrain = mkro00; n0 = 2*(n+1); break;
|
||||
case R2R_REDFT01: constrain = mkre01; n0 = 4*n; break;
|
||||
case R2R_REDFT10: constrain = mkre10; n0 = 4*n; break;
|
||||
case R2R_RODFT01: constrain = mkro01; n0 = 4*n; break;
|
||||
case R2R_RODFT10: constrain = mkio10; n0 = 4*n; break;
|
||||
case R2R_REDFT11: constrain = mkre11; n0 = 8*n; break;
|
||||
case R2R_RODFT11: constrain = mkro11; n0 = 8*n; break;
|
||||
default: BENCH_ASSERT(0); /* not yet implemented */
|
||||
}
|
||||
k.n0 = n0;
|
||||
|
||||
a = (C *) bench_malloc(n0 * sizeof(C));
|
||||
b = (C *) bench_malloc(n0 * sizeof(C));
|
||||
accuracy_test(&k.k, constrain, -1, n0, a, b, rounds, impulse_rounds, t);
|
||||
bench_free(b);
|
||||
bench_free(a);
|
||||
}
|
||||
307
fftw-3.3.10/libbench2/verify-rdft2.c
Normal file
307
fftw-3.3.10/libbench2/verify-rdft2.c
Normal file
@@ -0,0 +1,307 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "verify.h"
|
||||
|
||||
/* copy real A into real B, using output stride of A and input stride of B */
|
||||
typedef struct {
|
||||
dotens2_closure k;
|
||||
R *ra;
|
||||
R *rb;
|
||||
} cpyr_closure;
|
||||
|
||||
static void cpyr0(dotens2_closure *k_,
|
||||
int indxa, int ondxa, int indxb, int ondxb)
|
||||
{
|
||||
cpyr_closure *k = (cpyr_closure *)k_;
|
||||
k->rb[indxb] = k->ra[ondxa];
|
||||
UNUSED(indxa); UNUSED(ondxb);
|
||||
}
|
||||
|
||||
static void cpyr(R *ra, const bench_tensor *sza,
|
||||
R *rb, const bench_tensor *szb)
|
||||
{
|
||||
cpyr_closure k;
|
||||
k.k.apply = cpyr0;
|
||||
k.ra = ra; k.rb = rb;
|
||||
bench_dotens2(sza, szb, &k.k);
|
||||
}
|
||||
|
||||
/* copy unpacked halfcomplex A[n] into packed-complex B[n], using output stride
|
||||
of A and input stride of B. Only copies non-redundant half; other
|
||||
half must be copied via mkhermitian. */
|
||||
typedef struct {
|
||||
dotens2_closure k;
|
||||
int n;
|
||||
int as;
|
||||
int scalea;
|
||||
R *ra, *ia;
|
||||
R *rb, *ib;
|
||||
} cpyhc2_closure;
|
||||
|
||||
static void cpyhc20(dotens2_closure *k_,
|
||||
int indxa, int ondxa, int indxb, int ondxb)
|
||||
{
|
||||
cpyhc2_closure *k = (cpyhc2_closure *)k_;
|
||||
int i, n = k->n;
|
||||
int scalea = k->scalea;
|
||||
int as = k->as * scalea;
|
||||
R *ra = k->ra + ondxa * scalea, *ia = k->ia + ondxa * scalea;
|
||||
R *rb = k->rb + indxb, *ib = k->ib + indxb;
|
||||
UNUSED(indxa); UNUSED(ondxb);
|
||||
|
||||
for (i = 0; i < n/2 + 1; ++i) {
|
||||
rb[2*i] = ra[as*i];
|
||||
ib[2*i] = ia[as*i];
|
||||
}
|
||||
}
|
||||
|
||||
static void cpyhc2(R *ra, R *ia,
|
||||
const bench_tensor *sza, const bench_tensor *vecsza,
|
||||
int scalea,
|
||||
R *rb, R *ib, const bench_tensor *szb)
|
||||
{
|
||||
cpyhc2_closure k;
|
||||
BENCH_ASSERT(sza->rnk <= 1);
|
||||
k.k.apply = cpyhc20;
|
||||
k.n = tensor_sz(sza);
|
||||
k.scalea = scalea;
|
||||
if (!BENCH_FINITE_RNK(sza->rnk) || sza->rnk == 0)
|
||||
k.as = 0;
|
||||
else
|
||||
k.as = sza->dims[0].os;
|
||||
k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib;
|
||||
bench_dotens2(vecsza, szb, &k.k);
|
||||
}
|
||||
|
||||
/* icpyhc2 is the inverse of cpyhc2 */
|
||||
|
||||
static void icpyhc20(dotens2_closure *k_,
|
||||
int indxa, int ondxa, int indxb, int ondxb)
|
||||
{
|
||||
cpyhc2_closure *k = (cpyhc2_closure *)k_;
|
||||
int i, n = k->n;
|
||||
int scalea = k->scalea;
|
||||
int as = k->as * scalea;
|
||||
R *ra = k->ra + indxa * scalea, *ia = k->ia + indxa * scalea;
|
||||
R *rb = k->rb + ondxb, *ib = k->ib + ondxb;
|
||||
UNUSED(ondxa); UNUSED(indxb);
|
||||
|
||||
for (i = 0; i < n/2 + 1; ++i) {
|
||||
ra[as*i] = rb[2*i];
|
||||
ia[as*i] = ib[2*i];
|
||||
}
|
||||
}
|
||||
|
||||
static void icpyhc2(R *ra, R *ia,
|
||||
const bench_tensor *sza, const bench_tensor *vecsza,
|
||||
int scalea,
|
||||
R *rb, R *ib, const bench_tensor *szb)
|
||||
{
|
||||
cpyhc2_closure k;
|
||||
BENCH_ASSERT(sza->rnk <= 1);
|
||||
k.k.apply = icpyhc20;
|
||||
k.n = tensor_sz(sza);
|
||||
k.scalea = scalea;
|
||||
if (!BENCH_FINITE_RNK(sza->rnk) || sza->rnk == 0)
|
||||
k.as = 0;
|
||||
else
|
||||
k.as = sza->dims[0].is;
|
||||
k.ra = ra; k.ia = ia; k.rb = rb; k.ib = ib;
|
||||
bench_dotens2(vecsza, szb, &k.k);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
dofft_closure k;
|
||||
bench_problem *p;
|
||||
} dofft_rdft2_closure;
|
||||
|
||||
static void rdft2_apply(dofft_closure *k_,
|
||||
bench_complex *in, bench_complex *out)
|
||||
{
|
||||
dofft_rdft2_closure *k = (dofft_rdft2_closure *)k_;
|
||||
bench_problem *p = k->p;
|
||||
bench_tensor *totalsz, *pckdsz, *totalsz_swap, *pckdsz_swap;
|
||||
bench_tensor *probsz2, *totalsz2, *pckdsz2;
|
||||
bench_tensor *probsz2_swap, *totalsz2_swap, *pckdsz2_swap;
|
||||
bench_real *ri, *ii, *ro, *io;
|
||||
int n2, totalscale;
|
||||
|
||||
totalsz = tensor_append(p->vecsz, p->sz);
|
||||
pckdsz = verify_pack(totalsz, 2);
|
||||
n2 = tensor_sz(totalsz);
|
||||
if (BENCH_FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0)
|
||||
n2 = (n2 / p->sz->dims[p->sz->rnk - 1].n) *
|
||||
(p->sz->dims[p->sz->rnk - 1].n / 2 + 1);
|
||||
ri = (bench_real *) p->in;
|
||||
ro = (bench_real *) p->out;
|
||||
|
||||
if (BENCH_FINITE_RNK(p->sz->rnk) && p->sz->rnk > 0 && n2 > 0) {
|
||||
probsz2 = tensor_copy_sub(p->sz, p->sz->rnk - 1, 1);
|
||||
totalsz2 = tensor_copy_sub(totalsz, 0, totalsz->rnk - 1);
|
||||
pckdsz2 = tensor_copy_sub(pckdsz, 0, pckdsz->rnk - 1);
|
||||
}
|
||||
else {
|
||||
probsz2 = mktensor(0);
|
||||
totalsz2 = tensor_copy(totalsz);
|
||||
pckdsz2 = tensor_copy(pckdsz);
|
||||
}
|
||||
|
||||
totalsz_swap = tensor_copy_swapio(totalsz);
|
||||
pckdsz_swap = tensor_copy_swapio(pckdsz);
|
||||
totalsz2_swap = tensor_copy_swapio(totalsz2);
|
||||
pckdsz2_swap = tensor_copy_swapio(pckdsz2);
|
||||
probsz2_swap = tensor_copy_swapio(probsz2);
|
||||
|
||||
/* confusion: the stride is the distance between complex elements
|
||||
when using interleaved format, but it is the distance between
|
||||
real elements when using split format */
|
||||
if (p->split) {
|
||||
ii = p->ini ? (bench_real *) p->ini : ri + n2;
|
||||
io = p->outi ? (bench_real *) p->outi : ro + n2;
|
||||
totalscale = 1;
|
||||
} else {
|
||||
ii = p->ini ? (bench_real *) p->ini : ri + 1;
|
||||
io = p->outi ? (bench_real *) p->outi : ro + 1;
|
||||
totalscale = 2;
|
||||
}
|
||||
|
||||
if (p->sign < 0) { /* R2HC */
|
||||
int N, vN, i;
|
||||
cpyr(&c_re(in[0]), pckdsz, ri, totalsz);
|
||||
after_problem_rcopy_from(p, ri);
|
||||
doit(1, p);
|
||||
after_problem_hccopy_to(p, ro, io);
|
||||
if (k->k.recopy_input)
|
||||
cpyr(ri, totalsz_swap, &c_re(in[0]), pckdsz_swap);
|
||||
cpyhc2(ro, io, probsz2, totalsz2, totalscale,
|
||||
&c_re(out[0]), &c_im(out[0]), pckdsz2);
|
||||
N = tensor_sz(p->sz);
|
||||
vN = tensor_sz(p->vecsz);
|
||||
for (i = 0; i < vN; ++i)
|
||||
mkhermitian(out + i*N, p->sz->rnk, p->sz->dims, 1);
|
||||
}
|
||||
else { /* HC2R */
|
||||
icpyhc2(ri, ii, probsz2, totalsz2, totalscale,
|
||||
&c_re(in[0]), &c_im(in[0]), pckdsz2);
|
||||
after_problem_hccopy_from(p, ri, ii);
|
||||
doit(1, p);
|
||||
after_problem_rcopy_to(p, ro);
|
||||
if (k->k.recopy_input)
|
||||
cpyhc2(ri, ii, probsz2_swap, totalsz2_swap, totalscale,
|
||||
&c_re(in[0]), &c_im(in[0]), pckdsz2_swap);
|
||||
mkreal(out, tensor_sz(pckdsz));
|
||||
cpyr(ro, totalsz, &c_re(out[0]), pckdsz);
|
||||
}
|
||||
|
||||
tensor_destroy(totalsz);
|
||||
tensor_destroy(pckdsz);
|
||||
tensor_destroy(totalsz_swap);
|
||||
tensor_destroy(pckdsz_swap);
|
||||
tensor_destroy(probsz2);
|
||||
tensor_destroy(totalsz2);
|
||||
tensor_destroy(pckdsz2);
|
||||
tensor_destroy(probsz2_swap);
|
||||
tensor_destroy(totalsz2_swap);
|
||||
tensor_destroy(pckdsz2_swap);
|
||||
}
|
||||
|
||||
void verify_rdft2(bench_problem *p, int rounds, double tol, errors *e)
|
||||
{
|
||||
C *inA, *inB, *inC, *outA, *outB, *outC, *tmp;
|
||||
int n, vecn, N;
|
||||
dofft_rdft2_closure k;
|
||||
|
||||
BENCH_ASSERT(p->kind == PROBLEM_REAL);
|
||||
|
||||
if (!BENCH_FINITE_RNK(p->sz->rnk) || !BENCH_FINITE_RNK(p->vecsz->rnk))
|
||||
return; /* give up */
|
||||
|
||||
k.k.apply = rdft2_apply;
|
||||
k.k.recopy_input = 0;
|
||||
k.p = p;
|
||||
|
||||
if (rounds == 0)
|
||||
rounds = 20; /* default value */
|
||||
|
||||
n = tensor_sz(p->sz);
|
||||
vecn = tensor_sz(p->vecsz);
|
||||
N = n * vecn;
|
||||
|
||||
inA = (C *) bench_malloc(N * sizeof(C));
|
||||
inB = (C *) bench_malloc(N * sizeof(C));
|
||||
inC = (C *) bench_malloc(N * sizeof(C));
|
||||
outA = (C *) bench_malloc(N * sizeof(C));
|
||||
outB = (C *) bench_malloc(N * sizeof(C));
|
||||
outC = (C *) bench_malloc(N * sizeof(C));
|
||||
tmp = (C *) bench_malloc(N * sizeof(C));
|
||||
|
||||
e->i = impulse(&k.k, n, vecn, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol);
|
||||
e->l = linear(&k.k, 1, N, inA, inB, inC, outA, outB, outC,
|
||||
tmp, rounds, tol);
|
||||
|
||||
e->s = 0.0;
|
||||
if (p->sign < 0)
|
||||
e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign,
|
||||
inA, inB, outA, outB,
|
||||
tmp, rounds, tol, TIME_SHIFT));
|
||||
else
|
||||
e->s = dmax(e->s, tf_shift(&k.k, 1, p->sz, n, vecn, p->sign,
|
||||
inA, inB, outA, outB,
|
||||
tmp, rounds, tol, FREQ_SHIFT));
|
||||
|
||||
if (!p->in_place && !p->destroy_input)
|
||||
preserves_input(&k.k, p->sign < 0 ? mkreal : mkhermitian1,
|
||||
N, inA, inB, outB, rounds);
|
||||
|
||||
bench_free(tmp);
|
||||
bench_free(outC);
|
||||
bench_free(outB);
|
||||
bench_free(outA);
|
||||
bench_free(inC);
|
||||
bench_free(inB);
|
||||
bench_free(inA);
|
||||
}
|
||||
|
||||
void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6])
|
||||
{
|
||||
dofft_rdft2_closure k;
|
||||
int n;
|
||||
C *a, *b;
|
||||
|
||||
BENCH_ASSERT(p->kind == PROBLEM_REAL);
|
||||
BENCH_ASSERT(p->sz->rnk == 1);
|
||||
BENCH_ASSERT(p->vecsz->rnk == 0);
|
||||
|
||||
k.k.apply = rdft2_apply;
|
||||
k.k.recopy_input = 0;
|
||||
k.p = p;
|
||||
n = tensor_sz(p->sz);
|
||||
|
||||
a = (C *) bench_malloc(n * sizeof(C));
|
||||
b = (C *) bench_malloc(n * sizeof(C));
|
||||
accuracy_test(&k.k, p->sign < 0 ? mkreal : mkhermitian1, p->sign,
|
||||
n, a, b, rounds, impulse_rounds, t);
|
||||
bench_free(b);
|
||||
bench_free(a);
|
||||
}
|
||||
96
fftw-3.3.10/libbench2/verify.c
Normal file
96
fftw-3.3.10/libbench2/verify.c
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2000 Matteo Frigo
|
||||
* Copyright (c) 2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "verify.h"
|
||||
|
||||
void verify_problem(bench_problem *p, int rounds, double tol)
|
||||
{
|
||||
errors e;
|
||||
const char *pstring = p->pstring ? p->pstring : "<unknown problem>";
|
||||
|
||||
switch (p->kind) {
|
||||
case PROBLEM_COMPLEX: verify_dft(p, rounds, tol, &e); break;
|
||||
case PROBLEM_REAL: verify_rdft2(p, rounds, tol, &e); break;
|
||||
case PROBLEM_R2R: verify_r2r(p, rounds, tol, &e); break;
|
||||
}
|
||||
|
||||
if (verbose)
|
||||
ovtpvt("%s %g %g %g\n", pstring, e.l, e.i, e.s);
|
||||
}
|
||||
|
||||
void verify(const char *param, int rounds, double tol)
|
||||
{
|
||||
bench_problem *p;
|
||||
|
||||
p = problem_parse(param);
|
||||
problem_alloc(p);
|
||||
|
||||
if (!can_do(p)) {
|
||||
ovtpvt_err("No can_do for %s\n", p->pstring);
|
||||
BENCH_ASSERT(0);
|
||||
}
|
||||
|
||||
problem_zero(p);
|
||||
setup(p);
|
||||
|
||||
verify_problem(p, rounds, tol);
|
||||
|
||||
done(p);
|
||||
problem_destroy(p);
|
||||
}
|
||||
|
||||
|
||||
static void do_accuracy(bench_problem *p, int rounds, int impulse_rounds)
|
||||
{
|
||||
double t[6];
|
||||
|
||||
switch (p->kind) {
|
||||
case PROBLEM_COMPLEX:
|
||||
accuracy_dft(p, rounds, impulse_rounds, t); break;
|
||||
case PROBLEM_REAL:
|
||||
accuracy_rdft2(p, rounds, impulse_rounds, t); break;
|
||||
case PROBLEM_R2R:
|
||||
accuracy_r2r(p, rounds, impulse_rounds, t); break;
|
||||
}
|
||||
|
||||
/* t[0] : L1 error
|
||||
t[1] : L2 error
|
||||
t[2] : Linf error
|
||||
t[3..5]: L1, L2, Linf backward error */
|
||||
ovtpvt("%6.2e %6.2e %6.2e %6.2e %6.2e %6.2e\n",
|
||||
t[0], t[1], t[2], t[3], t[4], t[5]);
|
||||
}
|
||||
|
||||
void accuracy(const char *param, int rounds, int impulse_rounds)
|
||||
{
|
||||
bench_problem *p;
|
||||
p = problem_parse(param);
|
||||
BENCH_ASSERT(can_do(p));
|
||||
problem_alloc(p);
|
||||
problem_zero(p);
|
||||
setup(p);
|
||||
do_accuracy(p, rounds, impulse_rounds);
|
||||
done(p);
|
||||
problem_destroy(p);
|
||||
}
|
||||
105
fftw-3.3.10/libbench2/verify.h
Normal file
105
fftw-3.3.10/libbench2/verify.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
typedef bench_real R;
|
||||
typedef bench_complex C;
|
||||
|
||||
typedef struct dofft_closure_s {
|
||||
void (*apply)(struct dofft_closure_s *k,
|
||||
bench_complex *in, bench_complex *out);
|
||||
int recopy_input;
|
||||
} dofft_closure;
|
||||
|
||||
double dmax(double x, double y);
|
||||
|
||||
typedef void (*aconstrain)(C *a, int n);
|
||||
|
||||
void arand(C *a, int n);
|
||||
void mkreal(C *A, int n);
|
||||
void mkhermitian(C *A, int rank, const bench_iodim *dim, int stride);
|
||||
void mkhermitian1(C *a, int n);
|
||||
void aadd(C *c, C *a, C *b, int n);
|
||||
void asub(C *c, C *a, C *b, int n);
|
||||
void arol(C *b, C *a, int n, int nb, int na);
|
||||
void aphase_shift(C *b, C *a, int n, int nb, int na, double sign);
|
||||
void ascale(C *a, C alpha, int n);
|
||||
double acmp(C *a, C *b, int n, const char *test, double tol);
|
||||
double mydrand(void);
|
||||
double impulse(dofft_closure *k,
|
||||
int n, int vecn,
|
||||
C *inA, C *inB, C *inC,
|
||||
C *outA, C *outB, C *outC,
|
||||
C *tmp, int rounds, double tol);
|
||||
double linear(dofft_closure *k, int realp,
|
||||
int n, C *inA, C *inB, C *inC, C *outA,
|
||||
C *outB, C *outC, C *tmp, int rounds, double tol);
|
||||
void preserves_input(dofft_closure *k, aconstrain constrain,
|
||||
int n, C *inA, C *inB, C *outB, int rounds);
|
||||
|
||||
enum { TIME_SHIFT, FREQ_SHIFT };
|
||||
double tf_shift(dofft_closure *k, int realp, const bench_tensor *sz,
|
||||
int n, int vecn, double sign,
|
||||
C *inA, C *inB, C *outA, C *outB, C *tmp,
|
||||
int rounds, double tol, int which_shift);
|
||||
|
||||
typedef struct dotens2_closure_s {
|
||||
void (*apply)(struct dotens2_closure_s *k,
|
||||
int indx0, int ondx0, int indx1, int ondx1);
|
||||
} dotens2_closure;
|
||||
|
||||
void bench_dotens2(const bench_tensor *sz0,
|
||||
const bench_tensor *sz1, dotens2_closure *k);
|
||||
|
||||
void accuracy_test(dofft_closure *k, aconstrain constrain,
|
||||
int sign, int n, C *a, C *b, int rounds, int impulse_rounds,
|
||||
double t[6]);
|
||||
|
||||
void accuracy_dft(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6]);
|
||||
void accuracy_rdft2(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6]);
|
||||
void accuracy_r2r(bench_problem *p, int rounds, int impulse_rounds,
|
||||
double t[6]);
|
||||
|
||||
#if defined(BENCHFFT_LDOUBLE) && HAVE_COSL
|
||||
typedef long double trigreal;
|
||||
# define COS cosl
|
||||
# define SIN sinl
|
||||
# define TAN tanl
|
||||
# define KTRIG(x) (x##L)
|
||||
#elif defined(BENCHFFT_QUAD) && HAVE_LIBQUADMATH
|
||||
typedef __float128 trigreal;
|
||||
# define COS cosq
|
||||
# define SIN sinq
|
||||
# define TAN tanq
|
||||
# define KTRIG(x) (x##Q)
|
||||
extern trigreal cosq(trigreal);
|
||||
extern trigreal sinq(trigreal);
|
||||
extern trigreal tanq(trigreal);
|
||||
#else
|
||||
typedef double trigreal;
|
||||
# define COS cos
|
||||
# define SIN sin
|
||||
# define TAN tan
|
||||
# define KTRIG(x) (x)
|
||||
#endif
|
||||
#define K2PI KTRIG(6.2831853071795864769252867665590057683943388)
|
||||
43
fftw-3.3.10/libbench2/zero.c
Normal file
43
fftw-3.3.10/libbench2/zero.c
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2001 Matteo Frigo
|
||||
* Copyright (c) 2001 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "libbench2/bench.h"
|
||||
|
||||
/* set I/O arrays to zero. Default routine */
|
||||
void problem_zero(bench_problem *p)
|
||||
{
|
||||
bench_complex czero = {0, 0};
|
||||
if (p->kind == PROBLEM_COMPLEX) {
|
||||
caset((bench_complex *) p->inphys, p->iphyssz, czero);
|
||||
caset((bench_complex *) p->outphys, p->ophyssz, czero);
|
||||
} else if (p->kind == PROBLEM_R2R) {
|
||||
aset((bench_real *) p->inphys, p->iphyssz, 0.0);
|
||||
aset((bench_real *) p->outphys, p->ophyssz, 0.0);
|
||||
} else if (p->kind == PROBLEM_REAL && p->sign < 0) {
|
||||
aset((bench_real *) p->inphys, p->iphyssz, 0.0);
|
||||
caset((bench_complex *) p->outphys, p->ophyssz, czero);
|
||||
} else if (p->kind == PROBLEM_REAL && p->sign > 0) {
|
||||
caset((bench_complex *) p->inphys, p->iphyssz, czero);
|
||||
aset((bench_real *) p->outphys, p->ophyssz, 0.0);
|
||||
} else {
|
||||
BENCH_ASSERT(0); /* TODO */
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user