This commit is contained in:
2025-07-12 12:17:44 +03:00
parent c759f60ff7
commit 792e1b937a
3507 changed files with 492613 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
AM_CPPFLAGS = -I $(top_srcdir)
noinst_LTLIBRARIES = libkernel.la
libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \
cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \
kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \
primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \
tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \
tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \
cycle.h ifftw.h

View File

@@ -0,0 +1,798 @@
# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = kernel
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
$(top_srcdir)/m4/acx_pthread.m4 \
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
$(top_srcdir)/m4/ax_gcc_version.m4 \
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libkernel_la_LIBADD =
am_libkernel_la_OBJECTS = align.lo alloc.lo assert.lo awake.lo \
buffered.lo cpy1d.lo cpy2d-pair.lo cpy2d.lo ct.lo debug.lo \
extract-reim.lo hash.lo iabs.lo kalloc.lo md5-1.lo md5.lo \
minmax.lo ops.lo pickdim.lo plan.lo planner.lo primes.lo \
print.lo problem.lo rader.lo scan.lo solver.lo solvtab.lo \
stride.lo tensor.lo tensor1.lo tensor2.lo tensor3.lo \
tensor4.lo tensor5.lo tensor7.lo tensor8.lo tensor9.lo \
tile2d.lo timer.lo transpose.lo trig.lo twiddle.lo
libkernel_la_OBJECTS = $(am_libkernel_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__maybe_remake_depfiles = depfiles
am__depfiles_remade = ./$(DEPDIR)/align.Plo ./$(DEPDIR)/alloc.Plo \
./$(DEPDIR)/assert.Plo ./$(DEPDIR)/awake.Plo \
./$(DEPDIR)/buffered.Plo ./$(DEPDIR)/cpy1d.Plo \
./$(DEPDIR)/cpy2d-pair.Plo ./$(DEPDIR)/cpy2d.Plo \
./$(DEPDIR)/ct.Plo ./$(DEPDIR)/debug.Plo \
./$(DEPDIR)/extract-reim.Plo ./$(DEPDIR)/hash.Plo \
./$(DEPDIR)/iabs.Plo ./$(DEPDIR)/kalloc.Plo \
./$(DEPDIR)/md5-1.Plo ./$(DEPDIR)/md5.Plo \
./$(DEPDIR)/minmax.Plo ./$(DEPDIR)/ops.Plo \
./$(DEPDIR)/pickdim.Plo ./$(DEPDIR)/plan.Plo \
./$(DEPDIR)/planner.Plo ./$(DEPDIR)/primes.Plo \
./$(DEPDIR)/print.Plo ./$(DEPDIR)/problem.Plo \
./$(DEPDIR)/rader.Plo ./$(DEPDIR)/scan.Plo \
./$(DEPDIR)/solver.Plo ./$(DEPDIR)/solvtab.Plo \
./$(DEPDIR)/stride.Plo ./$(DEPDIR)/tensor.Plo \
./$(DEPDIR)/tensor1.Plo ./$(DEPDIR)/tensor2.Plo \
./$(DEPDIR)/tensor3.Plo ./$(DEPDIR)/tensor4.Plo \
./$(DEPDIR)/tensor5.Plo ./$(DEPDIR)/tensor7.Plo \
./$(DEPDIR)/tensor8.Plo ./$(DEPDIR)/tensor9.Plo \
./$(DEPDIR)/tile2d.Plo ./$(DEPDIR)/timer.Plo \
./$(DEPDIR)/transpose.Plo ./$(DEPDIR)/trig.Plo \
./$(DEPDIR)/twiddle.Plo
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libkernel_la_SOURCES)
DIST_SOURCES = $(libkernel_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AS = @AS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AVX2_CFLAGS = @AVX2_CFLAGS@
AVX512_CFLAGS = @AVX512_CFLAGS@
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
AVX_CFLAGS = @AVX_CFLAGS@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CHECK_PL_OPTS = @CHECK_PL_OPTS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
C_MPI_FINT = @C_MPI_FINT@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
F77 = @F77@
FFLAGS = @FFLAGS@
FGREP = @FGREP@
FLIBS = @FLIBS@
GREP = @GREP@
INDENT = @INDENT@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
KCVI_CFLAGS = @KCVI_CFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBQUADMATH = @LIBQUADMATH@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
MPIRUN = @MPIRUN@
NEON_CFLAGS = @NEON_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OCAMLBUILD = @OCAMLBUILD@
OPENMP_CFLAGS = @OPENMP_CFLAGS@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
POW_LIB = @POW_LIB@
PRECISION = @PRECISION@
PREC_SUFFIX = @PREC_SUFFIX@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
STRIP = @STRIP@
THREADLIBS = @THREADLIBS@
VERSION = @VERSION@
VSX_CFLAGS = @VSX_CFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
ac_ct_F77 = @ac_ct_F77@
acx_pthread_config = @acx_pthread_config@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AM_CPPFLAGS = -I $(top_srcdir)
noinst_LTLIBRARIES = libkernel.la
libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \
cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \
kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \
primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \
tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \
tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \
cycle.h ifftw.h
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu kernel/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu kernel/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libkernel.la: $(libkernel_la_OBJECTS) $(libkernel_la_DEPENDENCIES) $(EXTRA_libkernel_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libkernel_la_OBJECTS) $(libkernel_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/align.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assert.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/awake.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy1d.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d-pair.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/extract-reim.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iabs.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kalloc.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5-1.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minmax.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ops.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pickdim.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/planner.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primes.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rader.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scan.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solver.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solvtab.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stride.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor1.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor2.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor3.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor4.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor5.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor7.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor8.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor9.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tile2d.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trig.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twiddle.Plo@am__quote@ # am--include-marker
$(am__depfiles_remade):
@$(MKDIR_P) $(@D)
@echo '# dummy' >$@-t && $(am__mv) $@-t $@
am--depfiles: $(am__depfiles_remade)
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) distdir-am
distdir-am: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-am
-rm -f ./$(DEPDIR)/align.Plo
-rm -f ./$(DEPDIR)/alloc.Plo
-rm -f ./$(DEPDIR)/assert.Plo
-rm -f ./$(DEPDIR)/awake.Plo
-rm -f ./$(DEPDIR)/buffered.Plo
-rm -f ./$(DEPDIR)/cpy1d.Plo
-rm -f ./$(DEPDIR)/cpy2d-pair.Plo
-rm -f ./$(DEPDIR)/cpy2d.Plo
-rm -f ./$(DEPDIR)/ct.Plo
-rm -f ./$(DEPDIR)/debug.Plo
-rm -f ./$(DEPDIR)/extract-reim.Plo
-rm -f ./$(DEPDIR)/hash.Plo
-rm -f ./$(DEPDIR)/iabs.Plo
-rm -f ./$(DEPDIR)/kalloc.Plo
-rm -f ./$(DEPDIR)/md5-1.Plo
-rm -f ./$(DEPDIR)/md5.Plo
-rm -f ./$(DEPDIR)/minmax.Plo
-rm -f ./$(DEPDIR)/ops.Plo
-rm -f ./$(DEPDIR)/pickdim.Plo
-rm -f ./$(DEPDIR)/plan.Plo
-rm -f ./$(DEPDIR)/planner.Plo
-rm -f ./$(DEPDIR)/primes.Plo
-rm -f ./$(DEPDIR)/print.Plo
-rm -f ./$(DEPDIR)/problem.Plo
-rm -f ./$(DEPDIR)/rader.Plo
-rm -f ./$(DEPDIR)/scan.Plo
-rm -f ./$(DEPDIR)/solver.Plo
-rm -f ./$(DEPDIR)/solvtab.Plo
-rm -f ./$(DEPDIR)/stride.Plo
-rm -f ./$(DEPDIR)/tensor.Plo
-rm -f ./$(DEPDIR)/tensor1.Plo
-rm -f ./$(DEPDIR)/tensor2.Plo
-rm -f ./$(DEPDIR)/tensor3.Plo
-rm -f ./$(DEPDIR)/tensor4.Plo
-rm -f ./$(DEPDIR)/tensor5.Plo
-rm -f ./$(DEPDIR)/tensor7.Plo
-rm -f ./$(DEPDIR)/tensor8.Plo
-rm -f ./$(DEPDIR)/tensor9.Plo
-rm -f ./$(DEPDIR)/tile2d.Plo
-rm -f ./$(DEPDIR)/timer.Plo
-rm -f ./$(DEPDIR)/transpose.Plo
-rm -f ./$(DEPDIR)/trig.Plo
-rm -f ./$(DEPDIR)/twiddle.Plo
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -f ./$(DEPDIR)/align.Plo
-rm -f ./$(DEPDIR)/alloc.Plo
-rm -f ./$(DEPDIR)/assert.Plo
-rm -f ./$(DEPDIR)/awake.Plo
-rm -f ./$(DEPDIR)/buffered.Plo
-rm -f ./$(DEPDIR)/cpy1d.Plo
-rm -f ./$(DEPDIR)/cpy2d-pair.Plo
-rm -f ./$(DEPDIR)/cpy2d.Plo
-rm -f ./$(DEPDIR)/ct.Plo
-rm -f ./$(DEPDIR)/debug.Plo
-rm -f ./$(DEPDIR)/extract-reim.Plo
-rm -f ./$(DEPDIR)/hash.Plo
-rm -f ./$(DEPDIR)/iabs.Plo
-rm -f ./$(DEPDIR)/kalloc.Plo
-rm -f ./$(DEPDIR)/md5-1.Plo
-rm -f ./$(DEPDIR)/md5.Plo
-rm -f ./$(DEPDIR)/minmax.Plo
-rm -f ./$(DEPDIR)/ops.Plo
-rm -f ./$(DEPDIR)/pickdim.Plo
-rm -f ./$(DEPDIR)/plan.Plo
-rm -f ./$(DEPDIR)/planner.Plo
-rm -f ./$(DEPDIR)/primes.Plo
-rm -f ./$(DEPDIR)/print.Plo
-rm -f ./$(DEPDIR)/problem.Plo
-rm -f ./$(DEPDIR)/rader.Plo
-rm -f ./$(DEPDIR)/scan.Plo
-rm -f ./$(DEPDIR)/solver.Plo
-rm -f ./$(DEPDIR)/solvtab.Plo
-rm -f ./$(DEPDIR)/stride.Plo
-rm -f ./$(DEPDIR)/tensor.Plo
-rm -f ./$(DEPDIR)/tensor1.Plo
-rm -f ./$(DEPDIR)/tensor2.Plo
-rm -f ./$(DEPDIR)/tensor3.Plo
-rm -f ./$(DEPDIR)/tensor4.Plo
-rm -f ./$(DEPDIR)/tensor5.Plo
-rm -f ./$(DEPDIR)/tensor7.Plo
-rm -f ./$(DEPDIR)/tensor8.Plo
-rm -f ./$(DEPDIR)/tensor9.Plo
-rm -f ./$(DEPDIR)/tile2d.Plo
-rm -f ./$(DEPDIR)/timer.Plo
-rm -f ./$(DEPDIR)/transpose.Plo
-rm -f ./$(DEPDIR)/trig.Plo
-rm -f ./$(DEPDIR)/twiddle.Plo
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
clean-generic clean-libtool clean-noinstLTLIBRARIES \
cscopelist-am ctags ctags-am distclean distclean-compile \
distclean-generic distclean-libtool distclean-tags distdir dvi \
dvi-am html html-am info info-am install install-am \
install-data install-data-am install-dvi install-dvi-am \
install-exec install-exec-am install-html install-html-am \
install-info install-info-am install-man install-pdf \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#if HAVE_SIMD
# define ALGN 16
#else
/* disable the alignment machinery, because it will break,
e.g., if sizeof(R) == 12 (as in long-double/x86) */
# define ALGN 0
#endif
/* NONPORTABLE */
int X(ialignment_of)(R *p)
{
#if ALGN == 0
UNUSED(p);
return 0;
#else
return (int)(((uintptr_t) p) % ALGN);
#endif
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void *X(malloc_plain)(size_t n)
{
void *p;
if (n == 0)
n = 1;
p = X(kernel_malloc)(n);
CK(p);
#ifdef MIN_ALIGNMENT
A((((uintptr_t)p) % MIN_ALIGNMENT) == 0);
#endif
return p;
}
void X(ifree)(void *p)
{
X(kernel_free)(p);
}
void X(ifree0)(void *p)
{
/* common pattern */
if (p) X(ifree)(p);
}

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#include <stdio.h>
#include <stdlib.h>
void X(assertion_failed)(const char *s, int line, const char *file)
{
fflush(stdout);
fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s);
#ifdef HAVE_ABORT
abort();
#else
exit(EXIT_FAILURE);
#endif
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void X(null_awake)(plan *ego, enum wakefulness wakefulness)
{
UNUSED(ego);
UNUSED(wakefulness);
/* do nothing */
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* routines shared by the various buffered solvers */
#include "kernel/ifftw.h"
#define DEFAULT_MAXNBUF ((INT)256)
/* approx. 512KB of buffers for complex data */
#define MAXBUFSZ (256 * 1024 / (INT)(sizeof(R)))
INT X(nbuf)(INT n, INT vl, INT maxnbuf)
{
INT i, nbuf, lb;
if (!maxnbuf)
maxnbuf = DEFAULT_MAXNBUF;
nbuf = X(imin)(maxnbuf,
X(imin)(vl, X(imax)((INT)1, MAXBUFSZ / n)));
/*
* Look for a buffer number (not too small) that divides the
* vector length, in order that we only need one child plan:
*/
lb = X(imax)(1, nbuf / 4);
for (i = nbuf; i >= lb; --i)
if (vl % i == 0)
return i;
/* whatever... */
return nbuf;
}
#define SKEW 6 /* need to be even for SIMD */
#define SKEWMOD 8
INT X(bufdist)(INT n, INT vl)
{
if (vl == 1)
return n;
else
/* return smallest X such that X >= N and X == SKEW (mod SKEWMOD) */
return n + X(modulo)(SKEW - n, SKEWMOD);
}
int X(toobig)(INT n)
{
return n > MAXBUFSZ;
}
/* TRUE if there exists i < which such that maxnbuf[i] and
maxnbuf[which] yield the same value, in which case we canonicalize
on the minimum value */
int X(nbuf_redundant)(INT n, INT vl, size_t which,
const INT *maxnbuf, size_t nmaxnbuf)
{
size_t i;
(void)nmaxnbuf; /* UNUSED */
for (i = 0; i < which; ++i)
if (X(nbuf)(n, vl, maxnbuf[i]) == X(nbuf)(n, vl, maxnbuf[which]))
return 1;
return 0;
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* out of place 1D copy routine */
#include "kernel/ifftw.h"
void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl)
{
INT i0, v;
A(I != O);
switch (vl) {
case 1:
if ((n0 & 1) || is0 != 1 || os0 != 1) {
for (; n0 > 0; --n0, I += is0, O += os0)
*O = *I;
break;
}
n0 /= 2; is0 = 2; os0 = 2;
/* fall through */
case 2:
if ((n0 & 1) || is0 != 2 || os0 != 2) {
for (; n0 > 0; --n0, I += is0, O += os0) {
R x0 = I[0];
R x1 = I[1];
O[0] = x0;
O[1] = x1;
}
break;
}
n0 /= 2; is0 = 4; os0 = 4;
/* fall through */
case 4:
for (; n0 > 0; --n0, I += is0, O += os0) {
R x0 = I[0];
R x1 = I[1];
R x2 = I[2];
R x3 = I[3];
O[0] = x0;
O[1] = x1;
O[2] = x2;
O[3] = x3;
}
break;
default:
for (i0 = 0; i0 < n0; ++i0)
for (v = 0; v < vl; ++v) {
R x0 = I[i0 * is0 + v];
O[i0 * os0 + v] = x0;
}
break;
}
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* out of place copy routines for pairs of isomorphic 2D arrays */
#include "kernel/ifftw.h"
void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1)
{
INT i0, i1;
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
R x0 = I0[i0 * is0 + i1 * is1];
R x1 = I1[i0 * is0 + i1 * is1];
O0[i0 * os0 + i1 * os1] = x0;
O1[i0 * os0 + i1 * os1] = x1;
}
}
void X(zero1d_pair)(R *O0, R *O1, INT n0, INT os0)
{
INT i0;
for (i0 = 0; i0 < n0; ++i0) {
O0[i0 * os0] = 0;
O1[i0 * os0] = 0;
}
}
/* like cpy2d_pair, but read input contiguously if possible */
void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1)
{
if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
else
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
}
/* like cpy2d_pair, but write output contiguously if possible */
void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1)
{
if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
else
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
}

207
fftw-3.3.10/kernel/cpy2d.c Normal file
View File

@@ -0,0 +1,207 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* out of place 2D copy routines */
#include "kernel/ifftw.h"
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
# ifdef HAVE_XMMINTRIN_H
# include <xmmintrin.h>
# define WIDE_TYPE __m128
# endif
#endif
#ifndef WIDE_TYPE
/* fall back to double, which means that WIDE_TYPE will be unused */
# define WIDE_TYPE double
#endif
void X(cpy2d)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT vl)
{
INT i0, i1, v;
switch (vl) {
case 1:
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
R x0 = I[i0 * is0 + i1 * is1];
O[i0 * os0 + i1 * os1] = x0;
}
break;
case 2:
if (1
&& (2 * sizeof(R) == sizeof(WIDE_TYPE))
&& (sizeof(WIDE_TYPE) > sizeof(double))
&& (((size_t)I) % sizeof(WIDE_TYPE) == 0)
&& (((size_t)O) % sizeof(WIDE_TYPE) == 0)
&& ((is0 & 1) == 0)
&& ((is1 & 1) == 0)
&& ((os0 & 1) == 0)
&& ((os1 & 1) == 0)) {
/* copy R[2] as WIDE_TYPE if WIDE_TYPE is large
enough to hold R[2], and if the input is
properly aligned. This is a win when R==double
and WIDE_TYPE is 128 bits. */
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
*(WIDE_TYPE *)&O[i0 * os0 + i1 * os1] =
*(WIDE_TYPE *)&I[i0 * is0 + i1 * is1];
}
} else if (1
&& (2 * sizeof(R) == sizeof(double))
&& (((size_t)I) % sizeof(double) == 0)
&& (((size_t)O) % sizeof(double) == 0)
&& ((is0 & 1) == 0)
&& ((is1 & 1) == 0)
&& ((os0 & 1) == 0)
&& ((os1 & 1) == 0)) {
/* copy R[2] as double if double is large enough to
hold R[2], and if the input is properly aligned.
This case applies when R==float */
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
*(double *)&O[i0 * os0 + i1 * os1] =
*(double *)&I[i0 * is0 + i1 * is1];
}
} else {
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0) {
R x0 = I[i0 * is0 + i1 * is1];
R x1 = I[i0 * is0 + i1 * is1 + 1];
O[i0 * os0 + i1 * os1] = x0;
O[i0 * os0 + i1 * os1 + 1] = x1;
}
}
break;
default:
for (i1 = 0; i1 < n1; ++i1)
for (i0 = 0; i0 < n0; ++i0)
for (v = 0; v < vl; ++v) {
R x0 = I[i0 * is0 + i1 * is1 + v];
O[i0 * os0 + i1 * os1 + v] = x0;
}
break;
}
}
/* like cpy2d, but read input contiguously if possible */
void X(cpy2d_ci)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT vl)
{
if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */
X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl);
else
X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl);
}
/* like cpy2d, but write output contiguously if possible */
void X(cpy2d_co)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT vl)
{
if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */
X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl);
else
X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl);
}
/* tiled copy routines */
struct cpy2d_closure {
R *I, *O;
INT is0, os0, is1, os1, vl;
R *buf;
};
static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
{
struct cpy2d_closure *k = (struct cpy2d_closure *)args;
X(cpy2d)(k->I + n0l * k->is0 + n1l * k->is1,
k->O + n0l * k->os0 + n1l * k->os1,
n0u - n0l, k->is0, k->os0,
n1u - n1l, k->is1, k->os1,
k->vl);
}
static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
{
struct cpy2d_closure *k = (struct cpy2d_closure *)args;
/* copy from I to buf */
X(cpy2d_ci)(k->I + n0l * k->is0 + n1l * k->is1,
k->buf,
n0u - n0l, k->is0, k->vl,
n1u - n1l, k->is1, k->vl * (n0u - n0l),
k->vl);
/* copy from buf to O */
X(cpy2d_co)(k->buf,
k->O + n0l * k->os0 + n1l * k->os1,
n0u - n0l, k->vl, k->os0,
n1u - n1l, k->vl * (n0u - n0l), k->os1,
k->vl);
}
void X(cpy2d_tiled)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1, INT vl)
{
INT tilesz = X(compute_tilesz)(vl,
1 /* input array */
+ 1 /* ouput array */);
struct cpy2d_closure k;
k.I = I;
k.O = O;
k.is0 = is0;
k.os0 = os0;
k.is1 = is1;
k.os1 = os1;
k.vl = vl;
k.buf = 0; /* unused */
X(tile2d)(0, n0, 0, n1, tilesz, dotile, &k);
}
void X(cpy2d_tiledbuf)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1, INT vl)
{
R buf[CACHESIZE / (2 * sizeof(R))];
/* input and buffer in cache, or
output and buffer in cache */
INT tilesz = X(compute_tilesz)(vl, 2);
struct cpy2d_closure k;
k.I = I;
k.O = O;
k.is0 = is0;
k.os0 = os0;
k.is1 = is1;
k.os1 = os1;
k.vl = vl;
k.buf = buf;
A(tilesz * tilesz * vl * sizeof(R) <= sizeof(buf));
X(tile2d)(0, n0, 0, n1, tilesz, dotile_buf, &k);
}

31
fftw-3.3.10/kernel/ct.c Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* common routines for Cooley-Tukey algorithms */
#include "kernel/ifftw.h"
#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0))
/* TRUE if radix-r is ugly for size n */
int X(ct_uglyp)(INT min_n, INT v, INT n, INT r)
{
return (n <= min_n) || (POW2P(n) && (v * (n / r)) <= 4);
}

564
fftw-3.3.10/kernel/cycle.h Normal file
View File

@@ -0,0 +1,564 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/* machine-dependent cycle counters code. Needs to be inlined. */
/***************************************************************************/
/* To use the cycle counters in your code, simply #include "cycle.h" (this
file), and then use the functions/macros:
ticks getticks(void);
ticks is an opaque typedef defined below, representing the current time.
You extract the elapsed time between two calls to gettick() via:
double elapsed(ticks t1, ticks t0);
which returns a double-precision variable in arbitrary units. You
are not expected to convert this into human units like seconds; it
is intended only for *comparisons* of time intervals.
(In order to use some of the OS-dependent timer routines like
Solaris' gethrtime, you need to paste the autoconf snippet below
into your configure.ac file and #include "config.h" before cycle.h,
or define the relevant macros manually if you are not using autoconf.)
*/
/***************************************************************************/
/* This file uses macros like HAVE_GETHRTIME that are assumed to be
defined according to whether the corresponding function/type/header
is available on your system. The necessary macros are most
conveniently defined if you are using GNU autoconf, via the tests:
dnl ---------------------------------------------------------------------
AC_C_INLINE
AC_HEADER_TIME
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
#include <sys/time.h>
#endif])
AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
dnl Cray UNICOS _rtc() (real-time clock) intrinsic
AC_MSG_CHECKING([for _rtc intrinsic])
rtc_ok=yes
AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
#include <intrinsics.h>
#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
AC_MSG_RESULT($rtc_ok)
dnl ---------------------------------------------------------------------
*/
/***************************************************************************/
#if TIME_WITH_SYS_TIME
# include <sys/time.h>
# include <time.h>
#else
# if HAVE_SYS_TIME_H
# include <sys/time.h>
# else
# include <time.h>
# endif
#endif
#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
{ \
return (double)t1 - (double)t0; \
}
/*----------------------------------------------------------------*/
/* Solaris */
#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
typedef hrtime_t ticks;
#define getticks gethrtime
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* AIX v. 4+ routines to read the real-time clock or time-base register */
#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
typedef timebasestruct_t ticks;
static __inline ticks getticks(void)
{
ticks t;
read_real_time(&t, TIMEBASE_SZ);
return t;
}
static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
{
time_base_to_time(&t1, TIMEBASE_SZ);
time_base_to_time(&t0, TIMEBASE_SZ);
return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 +
((double)t1.tb_low - (double)t0.tb_low));
}
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* PowerPC ``cycle'' counter using the time base register.
*/
#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long long ticks;
static __inline__ ticks getticks(void)
{
unsigned int tbl, tbu0, tbu1;
do {
__asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
__asm__ __volatile__ ("mftb %0" : "=r"(tbl));
__asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
} while (tbu0 != tbu1);
return (((unsigned long long)tbu0) << 32) | tbl;
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
from Carbon, requires no additional libraries to be linked). */
#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
#include <mach/mach_time.h>
typedef uint64_t ticks;
#define getticks mach_absolute_time
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* Pentium cycle counter
*/
#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long long ticks;
static __inline__ ticks getticks(void)
{
ticks ret;
__asm__ __volatile__("rdtsc": "=A" (ret));
/* no input, nothing else clobbered */
return ret;
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */
#endif
/* Visual C++ -- thanks to Morten Nissov for his help with this */
#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
#include <windows.h>
typedef LARGE_INTEGER ticks;
#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
static __inline ticks getticks(void)
{
ticks retval;
__asm {
RDTSC
mov retval.HighPart, edx
mov retval.LowPart, eax
}
return retval;
}
static __inline double elapsed(ticks t1, ticks t0)
{
return (double)t1.QuadPart - (double)t0.QuadPart;
}
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */
#endif
/*----------------------------------------------------------------*/
/*
* X86-64 cycle counter
*/
#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long long ticks;
static __inline__ ticks getticks(void)
{
unsigned a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((ticks)a) | (((ticks)d) << 32);
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0
#endif
/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
NOTE: this code will fail to link unless you use the -Masmkeyword compiler
option (grrr). */
#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long long ticks;
static ticks getticks(void)
{
asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; ");
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0
#endif
/* Visual C++, courtesy of Dirk Michaelis */
#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
#include <intrin.h>
#pragma intrinsic(__rdtsc)
typedef unsigned __int64 ticks;
#define getticks __rdtsc
INLINE_ELAPSED(__inline)
#define HAVE_TICK_COUNTER
#define TIME_MIN 5000.0
#endif
/*----------------------------------------------------------------*/
/*
* IA64 cycle counter
*/
/* intel's icc/ecc compiler */
#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long ticks;
#include <ia64intrin.h>
static __inline__ ticks getticks(void)
{
return __getReg(_IA64_REG_AR_ITC);
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/* gcc */
#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long ticks;
static __inline__ ticks getticks(void)
{
ticks ret;
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
return ret;
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
#include <machine/sys/inline.h>
typedef unsigned long ticks;
static inline ticks getticks(void)
{
ticks ret;
ret = _Asm_mov_from_ar (_AREG_ITC);
return ret;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
/* Microsoft Visual C++ */
#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER)
typedef unsigned __int64 ticks;
# ifdef __cplusplus
extern "C"
# endif
ticks __getReg(int whichReg);
#pragma intrinsic(__getReg)
static __inline ticks getticks(void)
{
volatile ticks temp;
temp = __getReg(3116);
return temp;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/*
* PA-RISC cycle counter
*/
#if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long ticks;
# ifdef __GNUC__
static __inline__ ticks getticks(void)
{
ticks ret;
__asm__ __volatile__("mfctl 16, %0": "=r" (ret));
/* no input, nothing else clobbered */
return ret;
}
# else
# include <machine/inline.h>
static inline unsigned long getticks(void)
{
register ticks ret;
_MFCTL(16, ret);
return ret;
}
# endif
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* S390, courtesy of James Treacy */
#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long long ticks;
static __inline__ ticks getticks(void)
{
ticks cycles;
__asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
return cycles;
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
/*
* The 32-bit cycle counter on alpha overflows pretty quickly,
* unfortunately. A 1GHz machine overflows in 4 seconds.
*/
typedef unsigned int ticks;
static __inline__ ticks getticks(void)
{
unsigned long cc;
__asm__ __volatile__ ("rpcc %0" : "=r"(cc));
return (cc & 0xFFFFFFFF);
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
typedef unsigned long ticks;
static __inline__ ticks getticks(void)
{
ticks ret;
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
return ret;
}
INLINE_ELAPSED(__inline__)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
#if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
# include <c_asm.h>
typedef unsigned int ticks;
static __inline ticks getticks(void)
{
unsigned long cc;
cc = asm("rpcc %v0");
return (cc & 0xFFFFFFFF);
}
INLINE_ELAPSED(__inline)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* SGI/Irix */
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) && !defined(__ANDROID__)
typedef struct timespec ticks;
static inline ticks getticks(void)
{
struct timespec t;
clock_gettime(CLOCK_SGI_CYCLE, &t);
return t;
}
static inline double elapsed(ticks t1, ticks t0)
{
return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 +
((double)t1.tv_nsec - (double)t0.tv_nsec);
}
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* Cray UNICOS _rtc() intrinsic function */
#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
#ifdef HAVE_INTRINSICS_H
# include <intrinsics.h>
#endif
typedef long long ticks;
#define getticks _rtc
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
/*----------------------------------------------------------------*/
/* MIPS ZBus */
#if HAVE_MIPS_ZBUS_TIMER
#if defined(__mips__) && !defined(HAVE_TICK_COUNTER)
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
typedef uint64_t ticks;
static inline ticks getticks(void)
{
static uint64_t* addr = 0;
if (addr == 0)
{
uint32_t rq_addr = 0x10030000;
int fd;
int pgsize;
pgsize = getpagesize();
fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0);
if (fd < 0) {
perror("open");
return NULL;
}
addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr);
close(fd);
if (addr == (uint64_t *)-1) {
perror("mmap");
return NULL;
}
}
return *addr;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#endif /* HAVE_MIPS_ZBUS_TIMER */
#if defined(HAVE_ARMV7A_CNTVCT)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint32_t Rt, Rt2 = 0;
asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(Rt), "=r"(Rt2));
return ((uint64_t)Rt) | (((uint64_t)Rt2) << 32);
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#if defined(HAVE_ARMV7A_PMCCNTR)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint32_t r;
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(r) );
return r;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && !defined(HAVE_ARMV8_PMCCNTR_EL0)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint64_t Rt;
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (Rt));
return Rt;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint64_t cc = 0;
asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(cc));
return cc;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif

View File

@@ -0,0 +1,53 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#ifdef FFTW_DEBUG
#include <stdio.h>
typedef struct {
printer super;
FILE *f;
} P_file;
static void putchr_file(printer *p_, char c)
{
P_file *p = (P_file *) p_;
fputc(c, p->f);
}
static printer *mkprinter_file(FILE *f)
{
P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0);
p->f = f;
return &p->super;
}
void X(debug)(const char *format, ...)
{
va_list ap;
printer *p = mkprinter_file(stderr);
va_start(ap, format);
p->vprint(p, format, ap);
va_end(ap);
X(printer_destroy)(p);
}
#endif

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* decompose complex pointer into real and imaginary parts.
Flip real and imaginary if there the sign does not match
FFTW's idea of what the sign should be */
void X(extract_reim)(int sign, R *c, R **r, R **i)
{
if (sign == FFT_SIGN) {
*r = c + 0;
*i = c + 1;
} else {
*r = c + 1;
*i = c + 0;
}
}

31
fftw-3.3.10/kernel/hash.c Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
unsigned X(hash)(const char *s)
{
unsigned h = 0xDEADBEEFu;
do {
h = h * 17 + (unsigned)(*s & 0xFF);
} while (*s++);
return h;
}

27
fftw-3.3.10/kernel/iabs.c Normal file
View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
INT X(iabs)(INT a)
{
return a < 0 ? (0 - a) : a;
}

1143
fftw-3.3.10/kernel/ifftw.h Normal file

File diff suppressed because it is too large Load Diff

144
fftw-3.3.10/kernel/kalloc.c Normal file
View File

@@ -0,0 +1,144 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#if defined(HAVE_MALLOC_H)
# include <malloc.h>
#endif
/* ``kernel'' malloc(), with proper memory alignment */
#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN
extern void *memalign(size_t, size_t);
#endif
#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN
extern int posix_memalign(void **, size_t, size_t);
#endif
#if defined(macintosh) /* MacOS 9 */
# include <Multiprocessing.h>
#endif
#define real_free free /* memalign and malloc use ordinary free */
#define IS_POWER_OF_TWO(n) (((n) > 0) && (((n) & ((n) - 1)) == 0))
#if defined(WITH_OUR_MALLOC) && (MIN_ALIGNMENT >= 8) && IS_POWER_OF_TWO(MIN_ALIGNMENT)
/* Our own MIN_ALIGNMENT-aligned malloc/free. Assumes sizeof(void*) is a
power of two <= 8 and that malloc is at least sizeof(void*)-aligned.
The main reason for this routine is that, as of this writing,
Windows does not include any aligned allocation routines in its
system libraries, and instead provides an implementation with a
Visual C++ "Processor Pack" that you have to statically link into
your program. We do not want to require users to have VC++
(e.g. gcc/MinGW should be fine). Our code should be at least as good
as the MS _aligned_malloc, in any case, according to second-hand
reports of the algorithm it employs (also based on plain malloc). */
static void *our_malloc(size_t n)
{
void *p0, *p;
if (!(p0 = malloc(n + MIN_ALIGNMENT))) return (void *) 0;
p = (void *) (((uintptr_t) p0 + MIN_ALIGNMENT) & (~((uintptr_t) (MIN_ALIGNMENT - 1))));
*((void **) p - 1) = p0;
return p;
}
static void our_free(void *p)
{
if (p) free(*((void **) p - 1));
}
#endif
void *X(kernel_malloc)(size_t n)
{
void *p;
#if defined(MIN_ALIGNMENT)
# if defined(WITH_OUR_MALLOC)
p = our_malloc(n);
# undef real_free
# define real_free our_free
# elif defined(__FreeBSD__) && (MIN_ALIGNMENT <= 16)
/* FreeBSD does not have memalign, but its malloc is 16-byte aligned. */
p = malloc(n);
# elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16)
/* MacOS X malloc is already 16-byte aligned */
p = malloc(n);
# elif defined(HAVE_MEMALIGN)
p = memalign(MIN_ALIGNMENT, n);
# elif defined(HAVE_POSIX_MEMALIGN)
/* note: posix_memalign is broken in glibc 2.2.5: it constrains
the size, not the alignment, to be (power of two) * sizeof(void*).
The bug seems to have been fixed as of glibc 2.3.1. */
if (posix_memalign(&p, MIN_ALIGNMENT, n))
p = (void*) 0;
# elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
/* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */
p = (void *) _mm_malloc(n, MIN_ALIGNMENT);
# undef real_free
# define real_free _mm_free
# elif defined(_MSC_VER)
/* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD
and _aligned_malloc/free (uses malloc.h) */
p = (void *) _aligned_malloc(n, MIN_ALIGNMENT);
# undef real_free
# define real_free _aligned_free
# elif defined(macintosh) /* MacOS 9 */
p = (void *) MPAllocateAligned(n,
# if MIN_ALIGNMENT == 8
kMPAllocate8ByteAligned,
# elif MIN_ALIGNMENT == 16
kMPAllocate16ByteAligned,
# elif MIN_ALIGNMENT == 32
kMPAllocate32ByteAligned,
# else
# error "Unknown alignment for MPAllocateAligned"
# endif
0);
# undef real_free
# define real_free MPFree
# else
/* Add your machine here and send a patch to fftw@fftw.org
or (e.g. for Windows) configure --with-our-malloc */
# error "Don't know how to malloc() aligned memory ... try configuring --with-our-malloc"
# endif
#else /* !defined(MIN_ALIGNMENT) */
p = malloc(n);
#endif
return p;
}
void X(kernel_free)(void *p)
{
real_free(p);
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void X(md5putb)(md5 *p, const void *d_, size_t len)
{
size_t i;
const unsigned char *d = (const unsigned char *)d_;
for (i = 0; i < len; ++i)
X(md5putc)(p, d[i]);
}
void X(md5puts)(md5 *p, const char *s)
{
/* also hash final '\0' */
do {
X(md5putc)(p, (unsigned)(*s & 0xFF));
} while(*s++);
}
void X(md5int)(md5 *p, int i)
{
X(md5putb)(p, &i, sizeof(i));
}
void X(md5INT)(md5 *p, INT i)
{
X(md5putb)(p, &i, sizeof(i));
}
void X(md5unsigned)(md5 *p, unsigned i)
{
X(md5putb)(p, &i, sizeof(i));
}

142
fftw-3.3.10/kernel/md5.c Normal file
View File

@@ -0,0 +1,142 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/*
independent implementation of Ron Rivest's MD5 message-digest
algorithm, based on rfc 1321.
Optimized for small code size, not speed. Works as long as
sizeof(md5uint) >= 4.
*/
#include "kernel/ifftw.h"
/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */
static const md5uint sintab[64] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
};
/* see rfc 1321 section 3.4 */
static const struct roundtab {
char k;
char s;
} roundtab[64] = {
{ 0, 7}, { 1, 12}, { 2, 17}, { 3, 22},
{ 4, 7}, { 5, 12}, { 6, 17}, { 7, 22},
{ 8, 7}, { 9, 12}, { 10, 17}, { 11, 22},
{ 12, 7}, { 13, 12}, { 14, 17}, { 15, 22},
{ 1, 5}, { 6, 9}, { 11, 14}, { 0, 20},
{ 5, 5}, { 10, 9}, { 15, 14}, { 4, 20},
{ 9, 5}, { 14, 9}, { 3, 14}, { 8, 20},
{ 13, 5}, { 2, 9}, { 7, 14}, { 12, 20},
{ 5, 4}, { 8, 11}, { 11, 16}, { 14, 23},
{ 1, 4}, { 4, 11}, { 7, 16}, { 10, 23},
{ 13, 4}, { 0, 11}, { 3, 16}, { 6, 23},
{ 9, 4}, { 12, 11}, { 15, 16}, { 2, 23},
{ 0, 6}, { 7, 10}, { 14, 15}, { 5, 21},
{ 12, 6}, { 3, 10}, { 10, 15}, { 1, 21},
{ 8, 6}, { 15, 10}, { 6, 15}, { 13, 21},
{ 4, 6}, { 11, 10}, { 2, 15}, { 9, 21}
};
#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s))))
static void doblock(md5sig state, const unsigned char *data)
{
md5uint a, b, c, d, t, x[16];
const md5uint msk = (md5uint)0xffffffffUL;
int i;
/* encode input bytes into md5uint */
for (i = 0; i < 16; ++i) {
const unsigned char *p = data + 4 * i;
x[i] = (unsigned)p[0] | ((unsigned)p[1] << 8) | ((unsigned)p[2] << 16) | ((unsigned)p[3] << 24);
}
a = state[0]; b = state[1]; c = state[2]; d = state[3];
for (i = 0; i < 64; ++i) {
const struct roundtab *p = roundtab + i;
switch (i >> 4) {
case 0: a += (b & c) | (~b & d); break;
case 1: a += (b & d) | (c & ~d); break;
case 2: a += b ^ c ^ d; break;
case 3: a += c ^ (b | ~d); break;
}
a += sintab[i];
a += x[(int)(p->k)];
a &= msk;
t = b + rol(a, p->s);
a = d; d = c; c = b; b = t;
}
state[0] = (state[0] + a) & msk;
state[1] = (state[1] + b) & msk;
state[2] = (state[2] + c) & msk;
state[3] = (state[3] + d) & msk;
}
void X(md5begin)(md5 *p)
{
p->s[0] = 0x67452301;
p->s[1] = 0xefcdab89;
p->s[2] = 0x98badcfe;
p->s[3] = 0x10325476;
p->l = 0;
}
void X(md5putc)(md5 *p, unsigned char c)
{
p->c[p->l % 64] = c;
if (((++p->l) % 64) == 0) doblock(p->s, p->c);
}
void X(md5end)(md5 *p)
{
unsigned l, i;
l = 8 * p->l; /* length before padding, in bits */
/* rfc 1321 section 3.1: padding */
X(md5putc)(p, 0x80);
while ((p->l % 64) != 56) X(md5putc)(p, 0x00);
/* rfc 1321 section 3.2: length (little endian) */
for (i = 0; i < 8; ++i) {
X(md5putc)(p, (unsigned char)(l & 0xFF));
l = l >> 8;
}
/* Now p->l % 64 == 0 and signature is in p->s */
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
INT X(imax)(INT a, INT b)
{
return (a > b) ? a : b;
}
INT X(imin)(INT a, INT b)
{
return (a < b) ? a : b;
}

62
fftw-3.3.10/kernel/ops.c Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void X(ops_zero)(opcnt *dst)
{
dst->add = dst->mul = dst->fma = dst->other = 0;
}
void X(ops_cpy)(const opcnt *src, opcnt *dst)
{
*dst = *src;
}
void X(ops_other)(INT o, opcnt *dst)
{
X(ops_zero)(dst);
dst->other = o;
}
void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst)
{
dst->add = m * a->add + b->add;
dst->mul = m * a->mul + b->mul;
dst->fma = m * a->fma + b->fma;
dst->other = m * a->other + b->other;
}
void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst)
{
X(ops_madd)(1, a, b, dst);
}
void X(ops_add2)(const opcnt *a, opcnt *dst)
{
X(ops_add)(a, dst, dst);
}
void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst)
{
X(ops_madd)(m, a, dst, dst);
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* Given a solver which_dim, a vector sz, and whether or not the
transform is out-of-place, return the actual dimension index that
it corresponds to. The basic idea here is that we return the
which_dim'th valid dimension, starting from the end if
which_dim < 0. */
static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp)
{
int i;
int count_ok = 0;
if (which_dim > 0) {
for (i = 0; i < sz->rnk; ++i) {
if (oop || sz->dims[i].is == sz->dims[i].os)
if (++count_ok == which_dim) {
*dp = i;
return 1;
}
}
}
else if (which_dim < 0) {
for (i = sz->rnk - 1; i >= 0; --i) {
if (oop || sz->dims[i].is == sz->dims[i].os)
if (++count_ok == -which_dim) {
*dp = i;
return 1;
}
}
}
else { /* zero: pick the middle, if valid */
i = (sz->rnk - 1) / 2;
if (i >= 0 && (oop || sz->dims[i].is == sz->dims[i].os)) {
*dp = i;
return 1;
}
}
return 0;
}
/* Like really_pickdim, but only returns 1 if no previous "buddy"
which_dim in the buddies list would give the same dim. */
int X(pickdim)(int which_dim, const int *buddies, size_t nbuddies,
const tensor *sz, int oop, int *dp)
{
size_t i;
int d1;
if (!really_pickdim(which_dim, sz, oop, dp))
return 0;
/* check whether some buddy solver would produce the same dim.
If so, consider this solver unapplicable and let the buddy
take care of it. The smallest-indexed buddy is applicable. */
for (i = 0; i < nbuddies; ++i) {
if (buddies[i] == which_dim)
break; /* found self */
if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1)
return 0; /* found equivalent buddy */
}
return 1;
}

70
fftw-3.3.10/kernel/plan.c Normal file
View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* "Plan: To bother about the best method of accomplishing an
accidental result." (Ambrose Bierce, The Enlarged Devil's
Dictionary). */
plan *X(mkplan)(size_t size, const plan_adt *adt)
{
plan *p = (plan *)MALLOC(size, PLANS);
A(adt->destroy);
p->adt = adt;
X(ops_zero)(&p->ops);
p->pcost = 0.0;
p->wakefulness = SLEEPY;
p->could_prune_now_p = 0;
return p;
}
/*
* destroy a plan
*/
void X(plan_destroy_internal)(plan *ego)
{
if (ego) {
A(ego->wakefulness == SLEEPY);
ego->adt->destroy(ego);
X(ifree)(ego);
}
}
/* dummy destroy routine for plans with no local state */
void X(plan_null_destroy)(plan *ego)
{
UNUSED(ego);
/* nothing */
}
void X(plan_awake)(plan *ego, enum wakefulness wakefulness)
{
if (ego) {
A(((wakefulness == SLEEPY) ^ (ego->wakefulness == SLEEPY)));
ego->adt->awake(ego, wakefulness);
ego->wakefulness = wakefulness;
}
}

1035
fftw-3.3.10/kernel/planner.c Normal file

File diff suppressed because it is too large Load Diff

212
fftw-3.3.10/kernel/primes.c Normal file
View File

@@ -0,0 +1,212 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/***************************************************************************/
/* Rader's algorithm requires lots of modular arithmetic, and if we
aren't careful we can have errors due to integer overflows. */
/* Compute (x * y) mod p, but watch out for integer overflows; we must
have 0 <= {x, y} < p.
If overflow is common, this routine is somewhat slower than
e.g. using 'long long' arithmetic. However, it has the advantage
of working when INT is 64 bits, and is also faster when overflow is
rare. FFTW calls this via the MULMOD macro, which further
optimizes for the case of small integers.
*/
#define ADD_MOD(x, y, p) ((x) >= (p) - (y)) ? ((x) + ((y) - (p))) : ((x) + (y))
INT X(safe_mulmod)(INT x, INT y, INT p)
{
INT r;
if (y > x)
return X(safe_mulmod)(y, x, p);
A(0 <= y && x < p);
r = 0;
while (y) {
r = ADD_MOD(r, x*(y&1), p); y >>= 1;
x = ADD_MOD(x, x, p);
}
return r;
}
/***************************************************************************/
/* Compute n^m mod p, where m >= 0 and p > 0. If we really cared, we
could make this tail-recursive. */
INT X(power_mod)(INT n, INT m, INT p)
{
A(p > 0);
if (m == 0)
return 1;
else if (m % 2 == 0) {
INT x = X(power_mod)(n, m / 2, p);
return MULMOD(x, x, p);
}
else
return MULMOD(n, X(power_mod)(n, m - 1, p), p);
}
/* the following two routines were contributed by Greg Dionne. */
static INT get_prime_factors(INT n, INT *primef)
{
INT i;
INT size = 0;
A(n % 2 == 0); /* this routine is designed only for even n */
primef[size++] = (INT)2;
do {
n >>= 1;
} while ((n & 1) == 0);
if (n == 1)
return size;
for (i = 3; i * i <= n; i += 2)
if (!(n % i)) {
primef[size++] = i;
do {
n /= i;
} while (!(n % i));
}
if (n == 1)
return size;
primef[size++] = n;
return size;
}
INT X(find_generator)(INT p)
{
INT n, i, size;
INT primef[16]; /* smallest number = 32589158477190044730 > 2^64 */
INT pm1 = p - 1;
if (p == 2)
return 1;
size = get_prime_factors(pm1, primef);
n = 2;
for (i = 0; i < size; i++)
if (X(power_mod)(n, pm1 / primef[i], p) == 1) {
i = -1;
n++;
}
return n;
}
/* Return first prime divisor of n (It would be at best slightly faster to
search a static table of primes; there are 6542 primes < 2^16.) */
INT X(first_divisor)(INT n)
{
INT i;
if (n <= 1)
return n;
if (n % 2 == 0)
return 2;
for (i = 3; i*i <= n; i += 2)
if (n % i == 0)
return i;
return n;
}
int X(is_prime)(INT n)
{
return(n > 1 && X(first_divisor)(n) == n);
}
INT X(next_prime)(INT n)
{
while (!X(is_prime)(n)) ++n;
return n;
}
int X(factors_into)(INT n, const INT *primes)
{
for (; *primes != 0; ++primes)
while ((n % *primes) == 0)
n /= *primes;
return (n == 1);
}
/* integer square root. Return floor(sqrt(N)) */
INT X(isqrt)(INT n)
{
INT guess, iguess;
A(n >= 0);
if (n == 0) return 0;
guess = n; iguess = 1;
do {
guess = (guess + iguess) / 2;
iguess = n / guess;
} while (guess > iguess);
return guess;
}
static INT isqrt_maybe(INT n)
{
INT guess = X(isqrt)(n);
return guess * guess == n ? guess : 0;
}
#define divides(a, b) (((b) % (a)) == 0)
INT X(choose_radix)(INT r, INT n)
{
if (r > 0) {
if (divides(r, n)) return r;
return 0;
} else if (r == 0) {
return X(first_divisor)(n);
} else {
/* r is negative. If n = (-r) * q^2, take q as the radix */
r = 0 - r;
return (n > r && divides(r, n)) ? isqrt_maybe(n / r) : 0;
}
}
/* return A mod N, works for all A including A < 0 */
INT X(modulo)(INT a, INT n)
{
A(n > 0);
if (a >= 0)
return a % n;
else
return (n - 1) - ((-(a + (INT)1)) % n);
}
/* TRUE if N factors into small primes */
int X(factors_into_small_primes)(INT n)
{
static const INT primes[] = { 2, 3, 5, 0 };
return X(factors_into)(n, primes);
}

244
fftw-3.3.10/kernel/print.c Normal file
View File

@@ -0,0 +1,244 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#include <stddef.h>
#include <stdarg.h>
#include <stdio.h>
#define BSZ 64
static void myputs(printer *p, const char *s)
{
char c;
while ((c = *s++))
p->putchr(p, c);
}
static void newline(printer *p)
{
int i;
p->putchr(p, '\n');
for (i = 0; i < p->indent; ++i)
p->putchr(p, ' ');
}
static const char *digits = "0123456789abcdef";
static void putint(printer *p, INT i)
{
char buf[BSZ];
char *f = buf;
if (i < 0) {
p->putchr(p, '-');
i = -i;
}
do {
*f++ = digits[i % 10];
i /= 10;
} while (i);
do {
p->putchr(p, *--f);
} while (f != buf);
}
static void putulong(printer *p, unsigned long i, unsigned base, int width)
{
char buf[BSZ];
char *f = buf;
do {
*f++ = digits[i % base];
i /= base;
} while (i);
while (width > f - buf) {
p->putchr(p, '0');
--width;
}
do {
p->putchr(p, *--f);
} while (f != buf);
}
static void vprint(printer *p, const char *format, va_list ap)
{
const char *s = format;
char c;
INT ival;
while ((c = *s++)) {
switch (c) {
case '%':
switch ((c = *s++)) {
case 'M': {
/* md5 value */
md5uint x = va_arg(ap, md5uint);
putulong(p, (unsigned long)(0xffffffffUL & x),
16u, 8);
break;
}
case 'c': {
int x = va_arg(ap, int);
p->putchr(p, (char)x);
break;
}
case 's': {
char *x = va_arg(ap, char *);
if (x)
myputs(p, x);
else
goto putnull;
break;
}
case 'd': {
int x = va_arg(ap, int);
ival = (INT)x;
goto putival;
}
case 'D': {
ival = va_arg(ap, INT);
goto putival;
}
case 'v': {
/* print optional vector length */
ival = va_arg(ap, INT);
if (ival > 1) {
myputs(p, "-x");
goto putival;
}
break;
}
case 'o': {
/* integer option. Usage: %oNAME= */
ival = va_arg(ap, INT);
if (ival)
p->putchr(p, '/');
while ((c = *s++) != '=')
if (ival)
p->putchr(p, c);
if (ival) {
p->putchr(p, '=');
goto putival;
}
break;
}
case 'u': {
unsigned x = va_arg(ap, unsigned);
putulong(p, (unsigned long)x, 10u, 0);
break;
}
case 'x': {
unsigned x = va_arg(ap, unsigned);
putulong(p, (unsigned long)x, 16u, 0);
break;
}
case '(': {
/* newline, augment indent level */
p->indent += p->indent_incr;
newline(p);
break;
}
case ')': {
/* decrement indent level */
p->indent -= p->indent_incr;
break;
}
case 'p': { /* note difference from C's %p */
/* print plan */
plan *x = va_arg(ap, plan *);
if (x)
x->adt->print(x, p);
else
goto putnull;
break;
}
case 'P': {
/* print problem */
problem *x = va_arg(ap, problem *);
if (x)
x->adt->print(x, p);
else
goto putnull;
break;
}
case 'T': {
/* print tensor */
tensor *x = va_arg(ap, tensor *);
if (x)
X(tensor_print)(x, p);
else
goto putnull;
break;
}
default:
A(0 /* unknown format */);
break;
putnull:
myputs(p, "(null)");
break;
putival:
putint(p, ival);
break;
}
break;
default:
p->putchr(p, c);
break;
}
}
}
static void print(printer *p, const char *format, ...)
{
va_list ap;
va_start(ap, format);
vprint(p, format, ap);
va_end(ap);
}
printer *X(mkprinter)(size_t size,
void (*putchr)(printer *p, char c),
void (*cleanup)(printer *p))
{
printer *s = (printer *)MALLOC(size, OTHER);
s->print = print;
s->vprint = vprint;
s->putchr = putchr;
s->cleanup = cleanup;
s->indent = 0;
s->indent_incr = 2;
return s;
}
void X(printer_destroy)(printer *p)
{
if (p->cleanup)
p->cleanup(p);
X(ifree)(p);
}

View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* constructor */
problem *X(mkproblem)(size_t sz, const problem_adt *adt)
{
problem *p = (problem *)MALLOC(sz, PROBLEMS);
p->adt = adt;
return p;
}
/* destructor */
void X(problem_destroy)(problem *ego)
{
if (ego)
ego->adt->destroy(ego);
}
/* management of unsolvable problems */
static void unsolvable_destroy(problem *ego)
{
UNUSED(ego);
}
static void unsolvable_hash(const problem *p, md5 *m)
{
UNUSED(p);
X(md5puts)(m, "unsolvable");
}
static void unsolvable_print(const problem *ego, printer *p)
{
UNUSED(ego);
p->print(p, "(unsolvable)");
}
static void unsolvable_zero(const problem *ego)
{
UNUSED(ego);
}
static const problem_adt padt =
{
PROBLEM_UNSOLVABLE,
unsolvable_hash,
unsolvable_zero,
unsolvable_print,
unsolvable_destroy
};
/* there is no point in malloc'ing this one */
static problem the_unsolvable_problem = { &padt };
problem *X(mkproblem_unsolvable)(void)
{
return &the_unsolvable_problem;
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/*
common routines for Rader solvers
*/
/* shared twiddle and omega lists, keyed by two/three integers. */
struct rader_tls {
INT k1, k2, k3;
R *W;
int refcnt;
rader_tl *cdr;
};
void X(rader_tl_insert)(INT k1, INT k2, INT k3, R *W, rader_tl **tl)
{
rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES);
t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W;
t->refcnt = 1; t->cdr = *tl; *tl = t;
}
R *X(rader_tl_find)(INT k1, INT k2, INT k3, rader_tl *t)
{
while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3))
t = t->cdr;
if (t) {
++t->refcnt;
return t->W;
} else
return 0;
}
void X(rader_tl_delete)(R *W, rader_tl **tl)
{
if (W) {
rader_tl **tp, *t;
for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr)
;
if (t && --t->refcnt <= 0) {
*tp = t->cdr;
X(ifree)(t->W);
X(ifree)(t);
}
}
}

204
fftw-3.3.10/kernel/scan.c Normal file
View File

@@ -0,0 +1,204 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#include <string.h>
#include <stddef.h>
#include <stdarg.h>
#include <stdio.h>
#ifdef USE_CTYPE
#include <ctype.h>
#else
/* Screw ctype. On linux, the is* functions call a routine that gets
the ctype map in the current locale. Because this operation is
expensive, the map is cached on a per-thread basis. I am not
willing to link this crap with FFTW. Not over my dead body.
Sic transit gloria mundi.
*/
#undef isspace
#define isspace(x) ((x) >= 0 && (x) <= ' ')
#undef isdigit
#define isdigit(x) ((x) >= '0' && (x) <= '9')
#undef isupper
#define isupper(x) ((x) >= 'A' && (x) <= 'Z')
#undef islower
#define islower(x) ((x) >= 'a' && (x) <= 'z')
#endif
static int mygetc(scanner *sc)
{
if (sc->ungotc != EOF) {
int c = sc->ungotc;
sc->ungotc = EOF;
return c;
}
return(sc->getchr(sc));
}
#define GETCHR(sc) mygetc(sc)
static void myungetc(scanner *sc, int c)
{
sc->ungotc = c;
}
#define UNGETCHR(sc, c) myungetc(sc, c)
static void eat_blanks(scanner *sc)
{
int ch;
while (ch = GETCHR(sc), isspace(ch))
;
UNGETCHR(sc, ch);
}
static void mygets(scanner *sc, char *s, int maxlen)
{
char *s0 = s;
int ch;
A(maxlen > 0);
while ((ch = GETCHR(sc)) != EOF && !isspace(ch)
&& ch != ')' && ch != '(' && s < s0 + maxlen)
*s++ = (char)(ch & 0xFF);
*s = 0;
UNGETCHR(sc, ch);
}
static long getlong(scanner *sc, int base, int *ret)
{
int sign = 1, ch, count;
long x = 0;
ch = GETCHR(sc);
if (ch == '-' || ch == '+') {
sign = ch == '-' ? -1 : 1;
ch = GETCHR(sc);
}
for (count = 0; ; ++count) {
if (isdigit(ch))
ch -= '0';
else if (isupper(ch))
ch -= 'A' - 10;
else if (islower(ch))
ch -= 'a' - 10;
else
break;
x = x * base + ch;
ch = GETCHR(sc);
}
x *= sign;
UNGETCHR(sc, ch);
*ret = count > 0;
return x;
}
/* vscan is mostly scanf-like, with our additional format specifiers,
but with a few twists. It returns simply 0 or 1 indicating whether
the match was successful. '(' and ')' in the format string match
those characters preceded by any whitespace. Finally, if a
character match fails, it will ungetchr() the last character back
onto the stream. */
static int vscan(scanner *sc, const char *format, va_list ap)
{
const char *s = format;
char c;
int ch = 0;
int fmt_len;
while ((c = *s++)) {
fmt_len = 0;
switch (c) {
case '%':
getformat:
switch ((c = *s++)) {
case 's': {
char *x = va_arg(ap, char *);
mygets(sc, x, fmt_len);
break;
}
case 'd': {
int *x = va_arg(ap, int *);
*x = (int) getlong(sc, 10, &ch);
if (!ch) return 0;
break;
}
case 'x': {
int *x = va_arg(ap, int *);
*x = (int) getlong(sc, 16, &ch);
if (!ch) return 0;
break;
}
case 'M': {
md5uint *x = va_arg(ap, md5uint *);
*x = (md5uint)
(0xFFFFFFFF & getlong(sc, 16, &ch));
if (!ch) return 0;
break;
}
case '*': {
if ((fmt_len = va_arg(ap, int)) <= 0) return 0;
goto getformat;
}
default:
A(0 /* unknown format */);
break;
}
break;
default:
if (isspace(c) || c == '(' || c == ')')
eat_blanks(sc);
if (!isspace(c) && (ch = GETCHR(sc)) != c) {
UNGETCHR(sc, ch);
return 0;
}
break;
}
}
return 1;
}
static int scan(scanner *sc, const char *format, ...)
{
int ret;
va_list ap;
va_start(ap, format);
ret = vscan(sc, format, ap);
va_end(ap);
return ret;
}
scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc))
{
scanner *s = (scanner *)MALLOC(size, OTHER);
s->scan = scan;
s->vscan = vscan;
s->getchr = getchr;
s->ungotc = EOF;
return s;
}
void X(scanner_destroy)(scanner *sc)
{
X(ifree)(sc);
}

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
solver *X(mksolver)(size_t size, const solver_adt *adt)
{
solver *s = (solver *)MALLOC(size, SOLVERS);
s->adt = adt;
s->refcnt = 0;
return s;
}
void X(solver_use)(solver *ego)
{
++ego->refcnt;
}
void X(solver_destroy)(solver *ego)
{
if ((--ego->refcnt) == 0) {
if (ego->adt->destroy)
ego->adt->destroy(ego);
X(ifree)(ego);
}
}
void X(solver_register)(planner *plnr, solver *s)
{
plnr->adt->register_solver(plnr, s);
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void X(solvtab_exec)(const solvtab tbl, planner *p)
{
for (; tbl->reg_nam; ++tbl) {
p->cur_reg_nam = tbl->reg_nam;
p->cur_reg_id = 0;
tbl->reg(p);
}
p->cur_reg_nam = 0;
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
const INT X(an_INT_guaranteed_to_be_zero) = 0;
#ifdef PRECOMPUTE_ARRAY_INDICES
stride X(mkstride)(INT n, INT s)
{
int i;
INT *p;
A(n >= 0);
p = (INT *) MALLOC((size_t)n * sizeof(INT), STRIDES);
for (i = 0; i < n; ++i)
p[i] = s * i;
return p;
}
void X(stride_destroy)(stride p)
{
X(ifree0)(p);
}
#endif

123
fftw-3.3.10/kernel/tensor.c Normal file
View File

@@ -0,0 +1,123 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
tensor *X(mktensor)(int rnk)
{
tensor *x;
A(rnk >= 0);
#if defined(STRUCT_HACK_KR)
if (FINITE_RNK(rnk) && rnk > 1)
x = (tensor *)MALLOC(sizeof(tensor) + (unsigned)(rnk - 1) * sizeof(iodim),
TENSORS);
else
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
#elif defined(STRUCT_HACK_C99)
if (FINITE_RNK(rnk))
x = (tensor *)MALLOC(sizeof(tensor) + (unsigned)rnk * sizeof(iodim),
TENSORS);
else
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
#else
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
if (FINITE_RNK(rnk) && rnk > 0)
x->dims = (iodim *)MALLOC(sizeof(iodim) * (unsigned)rnk, TENSORS);
else
x->dims = 0;
#endif
x->rnk = rnk;
return x;
}
void X(tensor_destroy)(tensor *sz)
{
#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR)
X(ifree0)(sz->dims);
#endif
X(ifree)(sz);
}
INT X(tensor_sz)(const tensor *sz)
{
int i;
INT n = 1;
if (!FINITE_RNK(sz->rnk))
return 0;
for (i = 0; i < sz->rnk; ++i)
n *= sz->dims[i].n;
return n;
}
void X(tensor_md5)(md5 *p, const tensor *t)
{
int i;
X(md5int)(p, t->rnk);
if (FINITE_RNK(t->rnk)) {
for (i = 0; i < t->rnk; ++i) {
const iodim *q = t->dims + i;
X(md5INT)(p, q->n);
X(md5INT)(p, q->is);
X(md5INT)(p, q->os);
}
}
}
/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting
appropriate n, is, and os components */
int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os)
{
A(t->rnk <= 1);
if (t->rnk == 1) {
const iodim *vd = t->dims;
*n = vd[0].n;
*is = vd[0].is;
*os = vd[0].os;
} else {
*n = 1;
*is = *os = 0;
}
return 1;
}
void X(tensor_print)(const tensor *x, printer *p)
{
if (FINITE_RNK(x->rnk)) {
int i;
int first = 1;
p->print(p, "(");
for (i = 0; i < x->rnk; ++i) {
const iodim *d = x->dims + i;
p->print(p, "%s(%D %D %D)",
first ? "" : " ",
d->n, d->is, d->os);
first = 0;
}
p->print(p, ")");
} else {
p->print(p, "rank-minfty");
}
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
tensor *X(mktensor_0d)(void)
{
return X(mktensor(0));
}
tensor *X(mktensor_1d)(INT n, INT is, INT os)
{
tensor *x = X(mktensor)(1);
x->dims[0].n = n;
x->dims[0].is = is;
x->dims[0].os = os;
return x;
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
tensor *X(mktensor_2d)(INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1)
{
tensor *x = X(mktensor)(2);
x->dims[0].n = n0;
x->dims[0].is = is0;
x->dims[0].os = os0;
x->dims[1].n = n1;
x->dims[1].is = is1;
x->dims[1].os = os1;
return x;
}
tensor *X(mktensor_3d)(INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT n2, INT is2, INT os2)
{
tensor *x = X(mktensor)(3);
x->dims[0].n = n0;
x->dims[0].is = is0;
x->dims[0].os = os0;
x->dims[1].n = n1;
x->dims[1].is = is1;
x->dims[1].os = os1;
x->dims[2].n = n2;
x->dims[2].is = is2;
x->dims[2].os = os2;
return x;
}

View File

@@ -0,0 +1,72 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* Currently, mktensor_4d and mktensor_5d are only used in the MPI
routines, where very complicated transpositions are required.
Therefore we split them into a separate source file. */
tensor *X(mktensor_4d)(INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT n2, INT is2, INT os2,
INT n3, INT is3, INT os3)
{
tensor *x = X(mktensor)(4);
x->dims[0].n = n0;
x->dims[0].is = is0;
x->dims[0].os = os0;
x->dims[1].n = n1;
x->dims[1].is = is1;
x->dims[1].os = os1;
x->dims[2].n = n2;
x->dims[2].is = is2;
x->dims[2].os = os2;
x->dims[3].n = n3;
x->dims[3].is = is3;
x->dims[3].os = os3;
return x;
}
tensor *X(mktensor_5d)(INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
INT n2, INT is2, INT os2,
INT n3, INT is3, INT os3,
INT n4, INT is4, INT os4)
{
tensor *x = X(mktensor)(5);
x->dims[0].n = n0;
x->dims[0].is = is0;
x->dims[0].os = os0;
x->dims[1].n = n1;
x->dims[1].is = is1;
x->dims[1].os = os1;
x->dims[2].n = n2;
x->dims[2].is = is2;
x->dims[2].os = os2;
x->dims[3].n = n3;
x->dims[3].is = is3;
x->dims[3].os = os3;
x->dims[4].n = n4;
x->dims[4].is = is4;
x->dims[4].os = os4;
return x;
}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
INT X(tensor_max_index)(const tensor *sz)
{
int i;
INT ni = 0, no = 0;
A(FINITE_RNK(sz->rnk));
for (i = 0; i < sz->rnk; ++i) {
const iodim *p = sz->dims + i;
ni += (p->n - 1) * X(iabs)(p->is);
no += (p->n - 1) * X(iabs)(p->os);
}
return X(imax)(ni, no);
}
#define tensor_min_xstride(sz, xs) { \
A(FINITE_RNK(sz->rnk)); \
if (sz->rnk == 0) return 0; \
else { \
int i; \
INT s = X(iabs)(sz->dims[0].xs); \
for (i = 1; i < sz->rnk; ++i) \
s = X(imin)(s, X(iabs)(sz->dims[i].xs)); \
return s; \
} \
}
INT X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is)
INT X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os)
INT X(tensor_min_stride)(const tensor *sz)
{
return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz));
}
int X(tensor_inplace_strides)(const tensor *sz)
{
int i;
A(FINITE_RNK(sz->rnk));
for (i = 0; i < sz->rnk; ++i) {
const iodim *p = sz->dims + i;
if (p->is != p->os)
return 0;
}
return 1;
}
int X(tensor_inplace_strides2)(const tensor *a, const tensor *b)
{
return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b));
}
/* return true (1) iff *any* strides of sz decrease when we
tensor_inplace_copy(sz, k). */
static int tensor_strides_decrease(const tensor *sz, inplace_kind k)
{
if (FINITE_RNK(sz->rnk)) {
int i;
for (i = 0; i < sz->rnk; ++i)
if ((sz->dims[i].os - sz->dims[i].is)
* (k == INPLACE_OS ? (INT)1 : (INT)-1) < 0)
return 1;
}
return 0;
}
/* Return true (1) iff *any* strides of sz decrease when we
tensor_inplace_copy(k) *or* if *all* strides of sz are unchanged
but *any* strides of vecsz decrease. This is used in indirect.c
to determine whether to use INPLACE_IS or INPLACE_OS.
Note: X(tensor_strides_decrease)(sz, vecsz, INPLACE_IS)
|| X(tensor_strides_decrease)(sz, vecsz, INPLACE_OS)
|| X(tensor_inplace_strides2)(p->sz, p->vecsz)
must always be true. */
int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz,
inplace_kind k)
{
return(tensor_strides_decrease(sz, k)
|| (X(tensor_inplace_strides)(sz)
&& tensor_strides_decrease(vecsz, k)));
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
static void dimcpy(iodim *dst, const iodim *src, int rnk)
{
int i;
if (FINITE_RNK(rnk))
for (i = 0; i < rnk; ++i)
dst[i] = src[i];
}
tensor *X(tensor_copy)(const tensor *sz)
{
tensor *x = X(mktensor)(sz->rnk);
dimcpy(x->dims, sz->dims, sz->rnk);
return x;
}
/* like X(tensor_copy), but makes strides in-place by
setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */
tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k)
{
tensor *x = X(tensor_copy)(sz);
if (FINITE_RNK(x->rnk)) {
int i;
if (k == INPLACE_OS)
for (i = 0; i < x->rnk; ++i)
x->dims[i].is = x->dims[i].os;
else
for (i = 0; i < x->rnk; ++i)
x->dims[i].os = x->dims[i].is;
}
return x;
}
/* Like X(tensor_copy), but copy all of the dimensions *except*
except_dim. */
tensor *X(tensor_copy_except)(const tensor *sz, int except_dim)
{
tensor *x;
A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk);
x = X(mktensor)(sz->rnk - 1);
dimcpy(x->dims, sz->dims, except_dim);
dimcpy(x->dims + except_dim, sz->dims + except_dim + 1,
x->rnk - except_dim);
return x;
}
/* Like X(tensor_copy), but copy only rnk dimensions starting
with start_dim. */
tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk)
{
tensor *x;
A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk);
x = X(mktensor)(rnk);
dimcpy(x->dims, sz->dims + start_dim, rnk);
return x;
}
tensor *X(tensor_append)(const tensor *a, const tensor *b)
{
if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) {
return X(mktensor)(RNK_MINFTY);
} else {
tensor *x = X(mktensor)(a->rnk + b->rnk);
dimcpy(x->dims, a->dims, a->rnk);
dimcpy(x->dims + a->rnk, b->dims, b->rnk);
return x;
}
}

View File

@@ -0,0 +1,215 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
static int signof(INT x)
{
if (x < 0) return -1;
if (x == 0) return 0;
/* if (x > 0) */ return 1;
}
/* total order among iodim's */
int X(dimcmp)(const iodim *a, const iodim *b)
{
INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os);
INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo);
/* in descending order of min{istride, ostride} */
if (sam != sbm)
return signof(sbm - sam);
/* in case of a tie, in descending order of istride */
if (sbi != sai)
return signof(sbi - sai);
/* in case of a tie, in descending order of ostride */
if (sbo != sao)
return signof(sbo - sao);
/* in case of a tie, in ascending order of n */
return signof(a->n - b->n);
}
static void canonicalize(tensor *x)
{
if (x->rnk > 1) {
qsort(x->dims, (unsigned)x->rnk, sizeof(iodim),
(int (*)(const void *, const void *))X(dimcmp));
}
}
static int compare_by_istride(const iodim *a, const iodim *b)
{
INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
/* in descending order of istride */
return signof(sbi - sai);
}
static tensor *really_compress(const tensor *sz)
{
int i, rnk;
tensor *x;
A(FINITE_RNK(sz->rnk));
for (i = rnk = 0; i < sz->rnk; ++i) {
A(sz->dims[i].n > 0);
if (sz->dims[i].n != 1)
++rnk;
}
x = X(mktensor)(rnk);
for (i = rnk = 0; i < sz->rnk; ++i) {
if (sz->dims[i].n != 1)
x->dims[rnk++] = sz->dims[i];
}
return x;
}
/* Like tensor_copy, but eliminate n == 1 dimensions, which
never affect any transform or transform vector.
Also, we sort the tensor into a canonical order of decreasing
strides (see X(dimcmp) for an exact definition). In general,
processing a loop/array in order of decreasing stride will improve
locality. Both forward and backwards traversal of the tensor are
considered e.g. by vrank-geq1, so sorting in increasing
vs. decreasing order is not really important. */
tensor *X(tensor_compress)(const tensor *sz)
{
tensor *x = really_compress(sz);
canonicalize(x);
return x;
}
/* Return whether the strides of a and b are such that they form an
effective contiguous 1d array. Assumes that a.is >= b.is. */
static int strides_contig(iodim *a, iodim *b)
{
return (a->is == b->is * b->n && a->os == b->os * b->n);
}
/* Like tensor_compress, but also compress into one dimension any
group of dimensions that form a contiguous block of indices with
some stride. (This can safely be done for transform vector sizes.) */
tensor *X(tensor_compress_contiguous)(const tensor *sz)
{
int i, rnk;
tensor *sz2, *x;
if (X(tensor_sz)(sz) == 0)
return X(mktensor)(RNK_MINFTY);
sz2 = really_compress(sz);
A(FINITE_RNK(sz2->rnk));
if (sz2->rnk <= 1) { /* nothing to compress. */
if (0) {
/* this call is redundant, because "sz->rnk <= 1" implies
that the tensor is already canonical, but I am writing
it explicitly because "logically" we need to canonicalize
the tensor before returning. */
canonicalize(sz2);
}
return sz2;
}
/* sort in descending order of |istride|, so that compressible
dimensions appear contigously */
qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim),
(int (*)(const void *, const void *))compare_by_istride);
/* compute what the rank will be after compression */
for (i = rnk = 1; i < sz2->rnk; ++i)
if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
++rnk;
/* merge adjacent dimensions whenever possible */
x = X(mktensor)(rnk);
x->dims[0] = sz2->dims[0];
for (i = rnk = 1; i < sz2->rnk; ++i) {
if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
x->dims[rnk - 1].n *= sz2->dims[i].n;
x->dims[rnk - 1].is = sz2->dims[i].is;
x->dims[rnk - 1].os = sz2->dims[i].os;
} else {
A(rnk < x->rnk);
x->dims[rnk++] = sz2->dims[i];
}
}
X(tensor_destroy)(sz2);
/* reduce to canonical form */
canonicalize(x);
return x;
}
/* The inverse of X(tensor_append): splits the sz tensor into
tensor a followed by tensor b, where a's rank is arnk. */
void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
{
A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));
*a = X(tensor_copy_sub)(sz, 0, arnk);
*b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
}
/* TRUE if the two tensors are equal */
int X(tensor_equal)(const tensor *a, const tensor *b)
{
if (a->rnk != b->rnk)
return 0;
if (FINITE_RNK(a->rnk)) {
int i;
for (i = 0; i < a->rnk; ++i)
if (0
|| a->dims[i].n != b->dims[i].n
|| a->dims[i].is != b->dims[i].is
|| a->dims[i].os != b->dims[i].os
)
return 0;
}
return 1;
}
/* TRUE if the sets of input and output locations described by
(append sz vecsz) are the same */
int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz)
{
tensor *t = X(tensor_append)(sz, vecsz);
tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS);
tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS);
tensor *tic = X(tensor_compress_contiguous)(ti);
tensor *toc = X(tensor_compress_contiguous)(to);
int retval = X(tensor_equal)(tic, toc);
X(tensor_destroy)(t);
X(tensor_destroy4)(ti, to, tic, toc);
return retval;
}

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
void X(tensor_destroy2)(tensor *a, tensor *b)
{
X(tensor_destroy)(a);
X(tensor_destroy)(b);
}
void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d)
{
X(tensor_destroy2)(a, b);
X(tensor_destroy2)(c, d);
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
int X(tensor_kosherp)(const tensor *x)
{
int i;
if (x->rnk < 0) return 0;
if (FINITE_RNK(x->rnk)) {
for (i = 0; i < x->rnk; ++i)
if (x->dims[i].n < 0)
return 0;
}
return 1;
}

View File

@@ -0,0 +1,53 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* out of place 2D copy routines */
#include "kernel/ifftw.h"
void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz,
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args),
void *args)
{
INT d0, d1;
A(tilesz > 0); /* infinite loops otherwise */
tail:
d0 = n0u - n0l;
d1 = n1u - n1l;
if (d0 >= d1 && d0 > tilesz) {
INT n0m = (n0u + n0l) / 2;
X(tile2d)(n0l, n0m, n1l, n1u, tilesz, f, args);
n0l = n0m; goto tail;
} else if (/* d1 >= d0 && */ d1 > tilesz) {
INT n1m = (n1u + n1l) / 2;
X(tile2d)(n0l, n0u, n1l, n1m, tilesz, f, args);
n1l = n1m; goto tail;
} else {
f(n0l, n0u, n1l, n1u, args);
}
}
INT X(compute_tilesz)(INT vl, int how_many_tiles_in_cache)
{
return X(isqrt)(CACHESIZE /
(((INT)sizeof(R)) * vl * (INT)how_many_tiles_in_cache));
}

194
fftw-3.3.10/kernel/timer.c Normal file
View File

@@ -0,0 +1,194 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifndef WITH_SLOW_TIMER
# include "cycle.h"
#endif
#ifndef FFTW_TIME_LIMIT
#define FFTW_TIME_LIMIT 2.0 /* don't run for more than two seconds */
#endif
/* the following code is disabled for now, because it seems to
require that we #include <windows.h> in ifftw.h to
typedef LARGE_INTEGER crude_time, and this pulls in the whole
Windows universe and leads to namespace conflicts (unless
we did some hack like assuming sizeof(LARGE_INTEGER) == sizeof(long long).
gettimeofday is provided by MinGW, which we use to cross-compile
FFTW for Windows, and this seems to work well enough */
#if 0 && (defined(__WIN32__) || defined(_WIN32) || defined(_WIN64))
crude_time X(get_crude_time)(void)
{
crude_time tv;
QueryPerformanceCounter(&tv);
return tv;
}
static double elapsed_since(crude_time t0)
{
crude_time t1, freq;
QueryPerformanceCounter(&t1);
QueryPerformanceFrequency(&freq);
return (((double) (t1.QuadPart - t0.QuadPart))) /
((double) freq.QuadPart);
}
# define TIME_MIN_SEC 1.0e-2
#elif defined(HAVE_GETTIMEOFDAY)
crude_time X(get_crude_time)(void)
{
crude_time tv;
gettimeofday(&tv, 0);
return tv;
}
#define elapsed_sec(t1,t0) ((double)(t1.tv_sec - t0.tv_sec) + \
(double)(t1.tv_usec - t0.tv_usec) * 1.0E-6)
static double elapsed_since(crude_time t0)
{
crude_time t1;
gettimeofday(&t1, 0);
return elapsed_sec(t1, t0);
}
# define TIME_MIN_SEC 1.0e-3
#else /* !HAVE_GETTIMEOFDAY */
/* Note that the only system where we are likely to need to fall back
on the clock() function is Windows, for which CLOCKS_PER_SEC is 1000
and thus the clock wraps once every 50 days. This should hopefully
be longer than the time required to create any single plan! */
crude_time X(get_crude_time)(void) { return clock(); }
#define elapsed_sec(t1,t0) ((double) ((t1) - (t0)) / CLOCKS_PER_SEC)
static double elapsed_since(crude_time t0)
{
return elapsed_sec(clock(), t0);
}
# define TIME_MIN_SEC 2.0e-1 /* from fftw2 */
#endif /* !HAVE_GETTIMEOFDAY */
double X(elapsed_since)(const planner *plnr, const problem *p, crude_time t0)
{
double t = elapsed_since(t0);
if (plnr->cost_hook)
t = plnr->cost_hook(p, t, COST_MAX);
return t;
}
#ifdef WITH_SLOW_TIMER
/* excruciatingly slow; only use this if there is no choice! */
typedef crude_time ticks;
# define getticks X(get_crude_time)
# define elapsed(t1,t0) elapsed_sec(t1,t0)
# define TIME_MIN TIME_MIN_SEC
# define TIME_REPEAT 4 /* from fftw2 */
# define HAVE_TICK_COUNTER
#endif
#ifdef HAVE_TICK_COUNTER
# ifndef TIME_MIN
# define TIME_MIN 100.0
# endif
# ifndef TIME_REPEAT
# define TIME_REPEAT 8
# endif
static double measure(plan *pln, const problem *p, int iter)
{
ticks t0, t1;
int i;
t0 = getticks();
for (i = 0; i < iter; ++i)
pln->adt->solve(pln, p);
t1 = getticks();
return elapsed(t1, t0);
}
double X(measure_execution_time)(const planner *plnr,
plan *pln, const problem *p)
{
int iter;
int repeat;
X(plan_awake)(pln, AWAKE_ZERO);
p->adt->zero(p);
start_over:
for (iter = 1; iter; iter *= 2) {
double tmin = 0;
int first = 1;
crude_time begin = X(get_crude_time)();
/* repeat the measurement TIME_REPEAT times */
for (repeat = 0; repeat < TIME_REPEAT; ++repeat) {
double t = measure(pln, p, iter);
if (plnr->cost_hook)
t = plnr->cost_hook(p, t, COST_MAX);
if (t < 0)
goto start_over;
if (first || t < tmin)
tmin = t;
first = 0;
/* do not run for too long */
if (X(elapsed_since)(plnr, p, begin) > FFTW_TIME_LIMIT)
break;
}
if (tmin >= TIME_MIN) {
X(plan_awake)(pln, SLEEPY);
return tmin / (double) iter;
}
}
goto start_over; /* may happen if timer is screwed up */
}
#else /* no cycle counter */
double X(measure_execution_time)(const planner *plnr,
plan *pln, const problem *p)
{
UNUSED(plnr);
UNUSED(p);
UNUSED(pln);
return -1.0;
}
#endif

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "kernel/ifftw.h"
/* in place square transposition, iterative */
void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl)
{
INT i0, i1, v;
switch (vl) {
case 1:
for (i1 = 1; i1 < n; ++i1) {
for (i0 = 0; i0 < i1; ++i0) {
R x0 = I[i1 * s0 + i0 * s1];
R y0 = I[i1 * s1 + i0 * s0];
I[i1 * s1 + i0 * s0] = x0;
I[i1 * s0 + i0 * s1] = y0;
}
}
break;
case 2:
for (i1 = 1; i1 < n; ++i1) {
for (i0 = 0; i0 < i1; ++i0) {
R x0 = I[i1 * s0 + i0 * s1];
R x1 = I[i1 * s0 + i0 * s1 + 1];
R y0 = I[i1 * s1 + i0 * s0];
R y1 = I[i1 * s1 + i0 * s0 + 1];
I[i1 * s1 + i0 * s0] = x0;
I[i1 * s1 + i0 * s0 + 1] = x1;
I[i1 * s0 + i0 * s1] = y0;
I[i1 * s0 + i0 * s1 + 1] = y1;
}
}
break;
default:
for (i1 = 1; i1 < n; ++i1) {
for (i0 = 0; i0 < i1; ++i0) {
for (v = 0; v < vl; ++v) {
R x0 = I[i1 * s0 + i0 * s1 + v];
R y0 = I[i1 * s1 + i0 * s0 + v];
I[i1 * s1 + i0 * s0 + v] = x0;
I[i1 * s0 + i0 * s1 + v] = y0;
}
}
}
break;
}
}
struct transpose_closure {
R *I;
INT s0, s1, vl, tilesz;
R *buf0, *buf1;
};
static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
{
struct transpose_closure *k = (struct transpose_closure *)args;
R *I = k->I;
INT s0 = k->s0, s1 = k->s1, vl = k->vl;
INT i0, i1, v;
switch (vl) {
case 1:
for (i1 = n1l; i1 < n1u; ++i1) {
for (i0 = n0l; i0 < n0u; ++i0) {
R x0 = I[i1 * s0 + i0 * s1];
R y0 = I[i1 * s1 + i0 * s0];
I[i1 * s1 + i0 * s0] = x0;
I[i1 * s0 + i0 * s1] = y0;
}
}
break;
case 2:
for (i1 = n1l; i1 < n1u; ++i1) {
for (i0 = n0l; i0 < n0u; ++i0) {
R x0 = I[i1 * s0 + i0 * s1];
R x1 = I[i1 * s0 + i0 * s1 + 1];
R y0 = I[i1 * s1 + i0 * s0];
R y1 = I[i1 * s1 + i0 * s0 + 1];
I[i1 * s1 + i0 * s0] = x0;
I[i1 * s1 + i0 * s0 + 1] = x1;
I[i1 * s0 + i0 * s1] = y0;
I[i1 * s0 + i0 * s1 + 1] = y1;
}
}
break;
default:
for (i1 = n1l; i1 < n1u; ++i1) {
for (i0 = n0l; i0 < n0u; ++i0) {
for (v = 0; v < vl; ++v) {
R x0 = I[i1 * s0 + i0 * s1 + v];
R y0 = I[i1 * s1 + i0 * s0 + v];
I[i1 * s1 + i0 * s0 + v] = x0;
I[i1 * s0 + i0 * s1 + v] = y0;
}
}
}
}
}
static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
{
struct transpose_closure *k = (struct transpose_closure *)args;
X(cpy2d_ci)(k->I + n0l * k->s0 + n1l * k->s1,
k->buf0,
n0u - n0l, k->s0, k->vl,
n1u - n1l, k->s1, k->vl * (n0u - n0l),
k->vl);
X(cpy2d_ci)(k->I + n0l * k->s1 + n1l * k->s0,
k->buf1,
n0u - n0l, k->s1, k->vl,
n1u - n1l, k->s0, k->vl * (n0u - n0l),
k->vl);
X(cpy2d_co)(k->buf1,
k->I + n0l * k->s0 + n1l * k->s1,
n0u - n0l, k->vl, k->s0,
n1u - n1l, k->vl * (n0u - n0l), k->s1,
k->vl);
X(cpy2d_co)(k->buf0,
k->I + n0l * k->s1 + n1l * k->s0,
n0u - n0l, k->vl, k->s1,
n1u - n1l, k->vl * (n0u - n0l), k->s0,
k->vl);
}
static void transpose_rec(R *I, INT n,
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u,
void *args),
struct transpose_closure *k)
{
tail:
if (n > 1) {
INT n2 = n / 2;
k->I = I;
X(tile2d)(0, n2, n2, n, k->tilesz, f, k);
transpose_rec(I, n2, f, k);
I += n2 * (k->s0 + k->s1); n -= n2; goto tail;
}
}
void X(transpose_tiled)(R *I, INT n, INT s0, INT s1, INT vl)
{
struct transpose_closure k;
k.s0 = s0;
k.s1 = s1;
k.vl = vl;
/* two blocks must be in cache, to be swapped */
k.tilesz = X(compute_tilesz)(vl, 2);
k.buf0 = k.buf1 = 0; /* unused */
transpose_rec(I, n, dotile, &k);
}
void X(transpose_tiledbuf)(R *I, INT n, INT s0, INT s1, INT vl)
{
struct transpose_closure k;
/* Assume that the the rows of I conflict into the same cache
lines, and therefore we don't need to reserve cache space for
the input. If the rows don't conflict, there is no reason
to use tiledbuf at all.*/
R buf0[CACHESIZE / (2 * sizeof(R))];
R buf1[CACHESIZE / (2 * sizeof(R))];
k.s0 = s0;
k.s1 = s1;
k.vl = vl;
k.tilesz = X(compute_tilesz)(vl, 2);
k.buf0 = buf0;
k.buf1 = buf1;
A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf0));
A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf1));
transpose_rec(I, n, dotile_buf, &k);
}

234
fftw-3.3.10/kernel/trig.c Normal file
View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* trigonometric functions */
#include "kernel/ifftw.h"
#include <math.h>
#if defined(TRIGREAL_IS_LONG_DOUBLE)
# define COS cosl
# define SIN sinl
# define KTRIG(x) (x##L)
# if defined(HAVE_DECL_SINL) && !HAVE_DECL_SINL
extern long double sinl(long double x);
# endif
# if defined(HAVE_DECL_COSL) && !HAVE_DECL_COSL
extern long double cosl(long double x);
# endif
#elif defined(TRIGREAL_IS_QUAD)
# define COS cosq
# define SIN sinq
# define KTRIG(x) (x##Q)
extern __float128 sinq(__float128 x);
extern __float128 cosq(__float128 x);
#else
# define COS cos
# define SIN sin
# define KTRIG(x) (x)
#endif
static const trigreal K2PI =
KTRIG(6.2831853071795864769252867665590057683943388);
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
* Improve accuracy by reducing x to range [0..1/8]
* before multiplication by 2 * PI.
*/
static void real_cexp(INT m, INT n, trigreal *out)
{
trigreal theta, c, s, t;
unsigned octant = 0;
INT quarter_n = n;
n += n; n += n;
m += m; m += m;
if (m < 0) m += n;
if (m > n - m) { m = n - m; octant |= 4; }
if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
theta = by2pi(m, n);
c = COS(theta); s = SIN(theta);
if (octant & 1) { t = c; c = s; s = t; }
if (octant & 2) { t = c; c = -s; s = t; }
if (octant & 4) { s = -s; }
out[0] = c;
out[1] = s;
}
static INT choose_twshft(INT n)
{
INT log2r = 0;
while (n > 0) {
++log2r;
n /= 4;
}
return log2r;
}
static void cexpl_sqrtn_table(triggen *p, INT m, trigreal *res)
{
m += p->n * (m < 0);
{
INT m0 = m & p->twmsk;
INT m1 = m >> p->twshft;
trigreal wr0 = p->W0[2 * m0];
trigreal wi0 = p->W0[2 * m0 + 1];
trigreal wr1 = p->W1[2 * m1];
trigreal wi1 = p->W1[2 * m1 + 1];
res[0] = wr1 * wr0 - wi1 * wi0;
res[1] = wi1 * wr0 + wr1 * wi0;
}
}
/* multiply (xr, xi) by exp(FFT_SIGN * 2*pi*i*m/n) */
static void rotate_sqrtn_table(triggen *p, INT m, R xr, R xi, R *res)
{
m += p->n * (m < 0);
{
INT m0 = m & p->twmsk;
INT m1 = m >> p->twshft;
trigreal wr0 = p->W0[2 * m0];
trigreal wi0 = p->W0[2 * m0 + 1];
trigreal wr1 = p->W1[2 * m1];
trigreal wi1 = p->W1[2 * m1 + 1];
trigreal wr = wr1 * wr0 - wi1 * wi0;
trigreal wi = wi1 * wr0 + wr1 * wi0;
#if FFT_SIGN == -1
res[0] = xr * wr + xi * wi;
res[1] = xi * wr - xr * wi;
#else
res[0] = xr * wr - xi * wi;
res[1] = xi * wr + xr * wi;
#endif
}
}
static void cexpl_sincos(triggen *p, INT m, trigreal *res)
{
real_cexp(m, p->n, res);
}
static void cexp_zero(triggen *p, INT m, R *res)
{
UNUSED(p); UNUSED(m);
res[0] = 0;
res[1] = 0;
}
static void cexpl_zero(triggen *p, INT m, trigreal *res)
{
UNUSED(p); UNUSED(m);
res[0] = 0;
res[1] = 0;
}
static void cexp_generic(triggen *p, INT m, R *res)
{
trigreal resl[2];
p->cexpl(p, m, resl);
res[0] = (R)resl[0];
res[1] = (R)resl[1];
}
static void rotate_generic(triggen *p, INT m, R xr, R xi, R *res)
{
trigreal w[2];
p->cexpl(p, m, w);
res[0] = xr * w[0] - xi * (FFT_SIGN * w[1]);
res[1] = xi * w[0] + xr * (FFT_SIGN * w[1]);
}
triggen *X(mktriggen)(enum wakefulness wakefulness, INT n)
{
INT i, n0, n1;
triggen *p = (triggen *)MALLOC(sizeof(*p), TWIDDLES);
p->n = n;
p->W0 = p->W1 = 0;
p->cexp = 0;
p->rotate = 0;
switch (wakefulness) {
case SLEEPY:
A(0 /* can't happen */);
break;
case AWAKE_SQRTN_TABLE: {
INT twshft = choose_twshft(n);
p->twshft = twshft;
p->twradix = ((INT)1) << twshft;
p->twmsk = p->twradix - 1;
n0 = p->twradix;
n1 = (n + n0 - 1) / n0;
p->W0 = (trigreal *)MALLOC(n0 * 2 * sizeof(trigreal), TWIDDLES);
p->W1 = (trigreal *)MALLOC(n1 * 2 * sizeof(trigreal), TWIDDLES);
for (i = 0; i < n0; ++i)
real_cexp(i, n, p->W0 + 2 * i);
for (i = 0; i < n1; ++i)
real_cexp(i * p->twradix, n, p->W1 + 2 * i);
p->cexpl = cexpl_sqrtn_table;
p->rotate = rotate_sqrtn_table;
break;
}
case AWAKE_SINCOS:
p->cexpl = cexpl_sincos;
break;
case AWAKE_ZERO:
p->cexp = cexp_zero;
p->cexpl = cexpl_zero;
break;
}
if (!p->cexp) {
if (sizeof(trigreal) == sizeof(R))
p->cexp = (void (*)(triggen *, INT, R *))p->cexpl;
else
p->cexp = cexp_generic;
}
if (!p->rotate)
p->rotate = rotate_generic;
return p;
}
void X(triggen_destroy)(triggen *p)
{
X(ifree0)(p->W0);
X(ifree0)(p->W1);
X(ifree)(p);
}

View File

@@ -0,0 +1,256 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Twiddle manipulation */
#include "kernel/ifftw.h"
#include <math.h>
#define HASHSZ 109
/* hash table of known twiddle factors */
static twid *twlist[HASHSZ];
static INT hash(INT n, INT r)
{
INT h = n * 17 + r;
if (h < 0) h = -h;
return (h % HASHSZ);
}
static int equal_instr(const tw_instr *p, const tw_instr *q)
{
if (p == q)
return 1;
for (;; ++p, ++q) {
if (p->op != q->op)
return 0;
switch (p->op) {
case TW_NEXT:
return (p->v == q->v); /* p->i is ignored */
case TW_FULL:
case TW_HALF:
if (p->v != q->v) return 0; /* p->i is ignored */
break;
default:
if (p->v != q->v || p->i != q->i) return 0;
break;
}
}
A(0 /* can't happen */);
}
static int ok_twid(const twid *t,
enum wakefulness wakefulness,
const tw_instr *q, INT n, INT r, INT m)
{
return (wakefulness == t->wakefulness &&
n == t->n &&
r == t->r &&
m <= t->m &&
equal_instr(t->instr, q));
}
static twid *lookup(enum wakefulness wakefulness,
const tw_instr *q, INT n, INT r, INT m)
{
twid *p;
for (p = twlist[hash(n,r)];
p && !ok_twid(p, wakefulness, q, n, r, m);
p = p->cdr)
;
return p;
}
static INT twlen0(INT r, const tw_instr *p, INT *vl)
{
INT ntwiddle = 0;
/* compute length of bytecode program */
A(r > 0);
for ( ; p->op != TW_NEXT; ++p) {
switch (p->op) {
case TW_FULL:
ntwiddle += (r - 1) * 2;
break;
case TW_HALF:
ntwiddle += (r - 1);
break;
case TW_CEXP:
ntwiddle += 2;
break;
case TW_COS:
case TW_SIN:
ntwiddle += 1;
break;
}
}
*vl = (INT)p->v;
return ntwiddle;
}
INT X(twiddle_length)(INT r, const tw_instr *p)
{
INT vl;
return twlen0(r, p, &vl);
}
static R *compute(enum wakefulness wakefulness,
const tw_instr *instr, INT n, INT r, INT m)
{
INT ntwiddle, j, vl;
R *W, *W0;
const tw_instr *p;
triggen *t = X(mktriggen)(wakefulness, n);
p = instr;
ntwiddle = twlen0(r, p, &vl);
A(m % vl == 0);
W0 = W = (R *)MALLOC((ntwiddle * (m / vl)) * sizeof(R), TWIDDLES);
for (j = 0; j < m; j += vl) {
for (p = instr; p->op != TW_NEXT; ++p) {
switch (p->op) {
case TW_FULL: {
INT i;
for (i = 1; i < r; ++i) {
A((j + (INT)p->v) * i < n);
A((j + (INT)p->v) * i > -n);
t->cexp(t, (j + (INT)p->v) * i, W);
W += 2;
}
break;
}
case TW_HALF: {
INT i;
A((r % 2) == 1);
for (i = 1; i + i < r; ++i) {
t->cexp(t, MULMOD(i, (j + (INT)p->v), n), W);
W += 2;
}
break;
}
case TW_COS: {
R d[2];
A((j + (INT)p->v) * p->i < n);
A((j + (INT)p->v) * p->i > -n);
t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
*W++ = d[0];
break;
}
case TW_SIN: {
R d[2];
A((j + (INT)p->v) * p->i < n);
A((j + (INT)p->v) * p->i > -n);
t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
*W++ = d[1];
break;
}
case TW_CEXP:
A((j + (INT)p->v) * p->i < n);
A((j + (INT)p->v) * p->i > -n);
t->cexp(t, (j + (INT)p->v) * (INT)p->i, W);
W += 2;
break;
}
}
}
X(triggen_destroy)(t);
return W0;
}
static void mktwiddle(enum wakefulness wakefulness,
twid **pp, const tw_instr *instr, INT n, INT r, INT m)
{
twid *p;
INT h;
if ((p = lookup(wakefulness, instr, n, r, m))) {
++p->refcnt;
} else {
p = (twid *) MALLOC(sizeof(twid), TWIDDLES);
p->n = n;
p->r = r;
p->m = m;
p->instr = instr;
p->refcnt = 1;
p->wakefulness = wakefulness;
p->W = compute(wakefulness, instr, n, r, m);
/* cons! onto twlist */
h = hash(n, r);
p->cdr = twlist[h];
twlist[h] = p;
}
*pp = p;
}
static void twiddle_destroy(twid **pp)
{
twid *p = *pp;
twid **q;
if ((--p->refcnt) == 0) {
/* remove p from twiddle list */
for (q = &twlist[hash(p->n, p->r)]; *q; q = &((*q)->cdr)) {
if (*q == p) {
*q = p->cdr;
X(ifree)(p->W);
X(ifree)(p);
*pp = 0;
return;
}
}
A(0 /* can't happen */ );
}
}
void X(twiddle_awake)(enum wakefulness wakefulness, twid **pp,
const tw_instr *instr, INT n, INT r, INT m)
{
switch (wakefulness) {
case SLEEPY:
twiddle_destroy(pp);
break;
default:
mktwiddle(wakefulness, pp, instr, n, r, m);
break;
}
}