Updates
This commit is contained in:
10
fftw-3.3.10/kernel/Makefile.am
Normal file
10
fftw-3.3.10/kernel/Makefile.am
Normal file
@@ -0,0 +1,10 @@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
noinst_LTLIBRARIES = libkernel.la
|
||||
|
||||
libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \
|
||||
cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \
|
||||
kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \
|
||||
primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \
|
||||
tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \
|
||||
tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \
|
||||
cycle.h ifftw.h
|
||||
798
fftw-3.3.10/kernel/Makefile.in
Normal file
798
fftw-3.3.10/kernel/Makefile.in
Normal file
@@ -0,0 +1,798 @@
|
||||
# Makefile.in generated by automake 1.16.3 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
VPATH = @srcdir@
|
||||
am__is_gnu_make = { \
|
||||
if test -z '$(MAKELEVEL)'; then \
|
||||
false; \
|
||||
elif test -n '$(MAKE_HOST)'; then \
|
||||
true; \
|
||||
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
|
||||
true; \
|
||||
else \
|
||||
false; \
|
||||
fi; \
|
||||
}
|
||||
am__make_running_with_option = \
|
||||
case $${target_option-} in \
|
||||
?) ;; \
|
||||
*) echo "am__make_running_with_option: internal error: invalid" \
|
||||
"target option '$${target_option-}' specified" >&2; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
has_opt=no; \
|
||||
sane_makeflags=$$MAKEFLAGS; \
|
||||
if $(am__is_gnu_make); then \
|
||||
sane_makeflags=$$MFLAGS; \
|
||||
else \
|
||||
case $$MAKEFLAGS in \
|
||||
*\\[\ \ ]*) \
|
||||
bs=\\; \
|
||||
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
|
||||
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
|
||||
esac; \
|
||||
fi; \
|
||||
skip_next=no; \
|
||||
strip_trailopt () \
|
||||
{ \
|
||||
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
|
||||
}; \
|
||||
for flg in $$sane_makeflags; do \
|
||||
test $$skip_next = yes && { skip_next=no; continue; }; \
|
||||
case $$flg in \
|
||||
*=*|--*) continue;; \
|
||||
-*I) strip_trailopt 'I'; skip_next=yes;; \
|
||||
-*I?*) strip_trailopt 'I';; \
|
||||
-*O) strip_trailopt 'O'; skip_next=yes;; \
|
||||
-*O?*) strip_trailopt 'O';; \
|
||||
-*l) strip_trailopt 'l'; skip_next=yes;; \
|
||||
-*l?*) strip_trailopt 'l';; \
|
||||
-[dEDm]) skip_next=yes;; \
|
||||
-[JT]) skip_next=yes;; \
|
||||
esac; \
|
||||
case $$flg in \
|
||||
*$$target_option*) has_opt=yes; break;; \
|
||||
esac; \
|
||||
done; \
|
||||
test $$has_opt = yes
|
||||
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
|
||||
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
subdir = kernel
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
|
||||
$(top_srcdir)/m4/acx_pthread.m4 \
|
||||
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
|
||||
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
|
||||
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_version.m4 \
|
||||
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
|
||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
||||
$(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = $(top_builddir)/config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
LTLIBRARIES = $(noinst_LTLIBRARIES)
|
||||
libkernel_la_LIBADD =
|
||||
am_libkernel_la_OBJECTS = align.lo alloc.lo assert.lo awake.lo \
|
||||
buffered.lo cpy1d.lo cpy2d-pair.lo cpy2d.lo ct.lo debug.lo \
|
||||
extract-reim.lo hash.lo iabs.lo kalloc.lo md5-1.lo md5.lo \
|
||||
minmax.lo ops.lo pickdim.lo plan.lo planner.lo primes.lo \
|
||||
print.lo problem.lo rader.lo scan.lo solver.lo solvtab.lo \
|
||||
stride.lo tensor.lo tensor1.lo tensor2.lo tensor3.lo \
|
||||
tensor4.lo tensor5.lo tensor7.lo tensor8.lo tensor9.lo \
|
||||
tile2d.lo timer.lo transpose.lo trig.lo twiddle.lo
|
||||
libkernel_la_OBJECTS = $(am_libkernel_la_OBJECTS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
am__v_lt_0 = --silent
|
||||
am__v_lt_1 =
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
am__v_P_0 = false
|
||||
am__v_P_1 = :
|
||||
AM_V_GEN = $(am__v_GEN_@AM_V@)
|
||||
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
|
||||
am__v_GEN_0 = @echo " GEN " $@;
|
||||
am__v_GEN_1 =
|
||||
AM_V_at = $(am__v_at_@AM_V@)
|
||||
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
|
||||
am__v_at_0 = @
|
||||
am__v_at_1 =
|
||||
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||
am__maybe_remake_depfiles = depfiles
|
||||
am__depfiles_remade = ./$(DEPDIR)/align.Plo ./$(DEPDIR)/alloc.Plo \
|
||||
./$(DEPDIR)/assert.Plo ./$(DEPDIR)/awake.Plo \
|
||||
./$(DEPDIR)/buffered.Plo ./$(DEPDIR)/cpy1d.Plo \
|
||||
./$(DEPDIR)/cpy2d-pair.Plo ./$(DEPDIR)/cpy2d.Plo \
|
||||
./$(DEPDIR)/ct.Plo ./$(DEPDIR)/debug.Plo \
|
||||
./$(DEPDIR)/extract-reim.Plo ./$(DEPDIR)/hash.Plo \
|
||||
./$(DEPDIR)/iabs.Plo ./$(DEPDIR)/kalloc.Plo \
|
||||
./$(DEPDIR)/md5-1.Plo ./$(DEPDIR)/md5.Plo \
|
||||
./$(DEPDIR)/minmax.Plo ./$(DEPDIR)/ops.Plo \
|
||||
./$(DEPDIR)/pickdim.Plo ./$(DEPDIR)/plan.Plo \
|
||||
./$(DEPDIR)/planner.Plo ./$(DEPDIR)/primes.Plo \
|
||||
./$(DEPDIR)/print.Plo ./$(DEPDIR)/problem.Plo \
|
||||
./$(DEPDIR)/rader.Plo ./$(DEPDIR)/scan.Plo \
|
||||
./$(DEPDIR)/solver.Plo ./$(DEPDIR)/solvtab.Plo \
|
||||
./$(DEPDIR)/stride.Plo ./$(DEPDIR)/tensor.Plo \
|
||||
./$(DEPDIR)/tensor1.Plo ./$(DEPDIR)/tensor2.Plo \
|
||||
./$(DEPDIR)/tensor3.Plo ./$(DEPDIR)/tensor4.Plo \
|
||||
./$(DEPDIR)/tensor5.Plo ./$(DEPDIR)/tensor7.Plo \
|
||||
./$(DEPDIR)/tensor8.Plo ./$(DEPDIR)/tensor9.Plo \
|
||||
./$(DEPDIR)/tile2d.Plo ./$(DEPDIR)/timer.Plo \
|
||||
./$(DEPDIR)/transpose.Plo ./$(DEPDIR)/trig.Plo \
|
||||
./$(DEPDIR)/twiddle.Plo
|
||||
am__mv = mv -f
|
||||
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
|
||||
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
|
||||
$(AM_CFLAGS) $(CFLAGS)
|
||||
AM_V_CC = $(am__v_CC_@AM_V@)
|
||||
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
|
||||
am__v_CC_0 = @echo " CC " $@;
|
||||
am__v_CC_1 =
|
||||
CCLD = $(CC)
|
||||
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
$(AM_LDFLAGS) $(LDFLAGS) -o $@
|
||||
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
|
||||
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
|
||||
am__v_CCLD_0 = @echo " CCLD " $@;
|
||||
am__v_CCLD_1 =
|
||||
SOURCES = $(libkernel_la_SOURCES)
|
||||
DIST_SOURCES = $(libkernel_la_SOURCES)
|
||||
am__can_run_installinfo = \
|
||||
case $$AM_UPDATE_INFO_DIR in \
|
||||
n|no|NO) false;; \
|
||||
*) (install-info --version) >/dev/null 2>&1;; \
|
||||
esac
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
|
||||
# Read a list of newline-separated strings from the standard input,
|
||||
# and print each of them once, without duplicates. Input order is
|
||||
# *not* preserved.
|
||||
am__uniquify_input = $(AWK) '\
|
||||
BEGIN { nonempty = 0; } \
|
||||
{ items[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in items) print i; }; } \
|
||||
'
|
||||
# Make sure the list of sources is unique. This is necessary because,
|
||||
# e.g., the same source file might be shared among _SOURCES variables
|
||||
# for different programs/libraries.
|
||||
am__define_uniq_tagged_files = \
|
||||
list='$(am__tagged_files)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | $(am__uniquify_input)`
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
ACLOCAL = @ACLOCAL@
|
||||
ALLOCA = @ALLOCA@
|
||||
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
|
||||
AMTAR = @AMTAR@
|
||||
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
|
||||
AR = @AR@
|
||||
AS = @AS@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AVX2_CFLAGS = @AVX2_CFLAGS@
|
||||
AVX512_CFLAGS = @AVX512_CFLAGS@
|
||||
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
|
||||
AVX_CFLAGS = @AVX_CFLAGS@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CHECK_PL_OPTS = @CHECK_PL_OPTS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
|
||||
C_MPI_FINT = @C_MPI_FINT@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
DLLTOOL = @DLLTOOL@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
F77 = @F77@
|
||||
FFLAGS = @FFLAGS@
|
||||
FGREP = @FGREP@
|
||||
FLIBS = @FLIBS@
|
||||
GREP = @GREP@
|
||||
INDENT = @INDENT@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
KCVI_CFLAGS = @KCVI_CFLAGS@
|
||||
LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBQUADMATH = @LIBQUADMATH@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
LIPO = @LIPO@
|
||||
LN_S = @LN_S@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
|
||||
MAINT = @MAINT@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
MPICC = @MPICC@
|
||||
MPILIBS = @MPILIBS@
|
||||
MPIRUN = @MPIRUN@
|
||||
NEON_CFLAGS = @NEON_CFLAGS@
|
||||
NM = @NM@
|
||||
NMEDIT = @NMEDIT@
|
||||
OBJDUMP = @OBJDUMP@
|
||||
OBJEXT = @OBJEXT@
|
||||
OCAMLBUILD = @OCAMLBUILD@
|
||||
OPENMP_CFLAGS = @OPENMP_CFLAGS@
|
||||
OTOOL = @OTOOL@
|
||||
OTOOL64 = @OTOOL64@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_URL = @PACKAGE_URL@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
POW_LIB = @POW_LIB@
|
||||
PRECISION = @PRECISION@
|
||||
PREC_SUFFIX = @PREC_SUFFIX@
|
||||
PTHREAD_CC = @PTHREAD_CC@
|
||||
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
|
||||
PTHREAD_LIBS = @PTHREAD_LIBS@
|
||||
RANLIB = @RANLIB@
|
||||
SED = @SED@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
|
||||
SHELL = @SHELL@
|
||||
SSE2_CFLAGS = @SSE2_CFLAGS@
|
||||
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
|
||||
STRIP = @STRIP@
|
||||
THREADLIBS = @THREADLIBS@
|
||||
VERSION = @VERSION@
|
||||
VSX_CFLAGS = @VSX_CFLAGS@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_AR = @ac_ct_AR@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_F77 = @ac_ct_F77@
|
||||
acx_pthread_config = @acx_pthread_config@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build = @build@
|
||||
build_alias = @build_alias@
|
||||
build_cpu = @build_cpu@
|
||||
build_os = @build_os@
|
||||
build_vendor = @build_vendor@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host = @host@
|
||||
host_alias = @host_alias@
|
||||
host_cpu = @host_cpu@
|
||||
host_os = @host_os@
|
||||
host_vendor = @host_vendor@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
runstatedir = @runstatedir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
noinst_LTLIBRARIES = libkernel.la
|
||||
libkernel_la_SOURCES = align.c alloc.c assert.c awake.c buffered.c \
|
||||
cpy1d.c cpy2d-pair.c cpy2d.c ct.c debug.c extract-reim.c hash.c iabs.c \
|
||||
kalloc.c md5-1.c md5.c minmax.c ops.c pickdim.c plan.c planner.c \
|
||||
primes.c print.c problem.c rader.c scan.c solver.c solvtab.c stride.c \
|
||||
tensor.c tensor1.c tensor2.c tensor3.c tensor4.c tensor5.c tensor7.c \
|
||||
tensor8.c tensor9.c tile2d.c timer.c transpose.c trig.c twiddle.c \
|
||||
cycle.h ifftw.h
|
||||
|
||||
all: all-am
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .lo .o .obj
|
||||
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu kernel/Makefile'; \
|
||||
$(am__cd) $(top_srcdir) && \
|
||||
$(AUTOMAKE) --gnu kernel/Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
|
||||
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(am__aclocal_m4_deps):
|
||||
|
||||
clean-noinstLTLIBRARIES:
|
||||
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
|
||||
@list='$(noinst_LTLIBRARIES)'; \
|
||||
locs=`for p in $$list; do echo $$p; done | \
|
||||
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
|
||||
sort -u`; \
|
||||
test -z "$$locs" || { \
|
||||
echo rm -f $${locs}; \
|
||||
rm -f $${locs}; \
|
||||
}
|
||||
|
||||
libkernel.la: $(libkernel_la_OBJECTS) $(libkernel_la_DEPENDENCIES) $(EXTRA_libkernel_la_DEPENDENCIES)
|
||||
$(AM_V_CCLD)$(LINK) $(libkernel_la_OBJECTS) $(libkernel_la_LIBADD) $(LIBS)
|
||||
|
||||
mostlyclean-compile:
|
||||
-rm -f *.$(OBJEXT)
|
||||
|
||||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/align.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assert.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/awake.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffered.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy1d.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d-pair.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpy2d.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ct.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/extract-reim.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iabs.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kalloc.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5-1.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minmax.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ops.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pickdim.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plan.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/planner.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primes.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/print.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/problem.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rader.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scan.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solver.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solvtab.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stride.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor1.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor2.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor3.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor4.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor5.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor7.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor8.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tensor9.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tile2d.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transpose.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trig.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twiddle.Plo@am__quote@ # am--include-marker
|
||||
|
||||
$(am__depfiles_remade):
|
||||
@$(MKDIR_P) $(@D)
|
||||
@echo '# dummy' >$@-t && $(am__mv) $@-t $@
|
||||
|
||||
am--depfiles: $(am__depfiles_remade)
|
||||
|
||||
.c.o:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
|
||||
|
||||
.c.obj:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
|
||||
.c.lo:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
|
||||
|
||||
mostlyclean-libtool:
|
||||
-rm -f *.lo
|
||||
|
||||
clean-libtool:
|
||||
-rm -rf .libs _libs
|
||||
|
||||
ID: $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); mkid -fID $$unique
|
||||
tags: tags-am
|
||||
TAGS: tags
|
||||
|
||||
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
set x; \
|
||||
here=`pwd`; \
|
||||
$(am__define_uniq_tagged_files); \
|
||||
shift; \
|
||||
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||
test -n "$$unique" || unique=$$empty_fix; \
|
||||
if test $$# -gt 0; then \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
"$$@" $$unique; \
|
||||
else \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
$$unique; \
|
||||
fi; \
|
||||
fi
|
||||
ctags: ctags-am
|
||||
|
||||
CTAGS: ctags
|
||||
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); \
|
||||
test -z "$(CTAGS_ARGS)$$unique" \
|
||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||
$$unique
|
||||
|
||||
GTAGS:
|
||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||
&& $(am__cd) $(top_srcdir) \
|
||||
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||
cscopelist: cscopelist-am
|
||||
|
||||
cscopelist-am: $(am__tagged_files)
|
||||
list='$(am__tagged_files)'; \
|
||||
case "$(srcdir)" in \
|
||||
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||
esac; \
|
||||
for i in $$list; do \
|
||||
if test -f "$$i"; then \
|
||||
echo "$(subdir)/$$i"; \
|
||||
else \
|
||||
echo "$$sdir/$$i"; \
|
||||
fi; \
|
||||
done >> $(top_builddir)/cscope.files
|
||||
|
||||
distclean-tags:
|
||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||
|
||||
distdir: $(BUILT_SOURCES)
|
||||
$(MAKE) $(AM_MAKEFLAGS) distdir-am
|
||||
|
||||
distdir-am: $(DISTFILES)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d "$(distdir)/$$file"; then \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||
else \
|
||||
test -f "$(distdir)/$$file" \
|
||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
check-am: all-am
|
||||
check: check-am
|
||||
all-am: Makefile $(LTLIBRARIES)
|
||||
installdirs:
|
||||
install: install-am
|
||||
install-exec: install-exec-am
|
||||
install-data: install-data-am
|
||||
uninstall: uninstall-am
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-am
|
||||
install-strip:
|
||||
if test -z '$(STRIP)'; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
install; \
|
||||
else \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||
fi
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-am
|
||||
|
||||
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
|
||||
mostlyclean-am
|
||||
|
||||
distclean: distclean-am
|
||||
-rm -f ./$(DEPDIR)/align.Plo
|
||||
-rm -f ./$(DEPDIR)/alloc.Plo
|
||||
-rm -f ./$(DEPDIR)/assert.Plo
|
||||
-rm -f ./$(DEPDIR)/awake.Plo
|
||||
-rm -f ./$(DEPDIR)/buffered.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy1d.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy2d-pair.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy2d.Plo
|
||||
-rm -f ./$(DEPDIR)/ct.Plo
|
||||
-rm -f ./$(DEPDIR)/debug.Plo
|
||||
-rm -f ./$(DEPDIR)/extract-reim.Plo
|
||||
-rm -f ./$(DEPDIR)/hash.Plo
|
||||
-rm -f ./$(DEPDIR)/iabs.Plo
|
||||
-rm -f ./$(DEPDIR)/kalloc.Plo
|
||||
-rm -f ./$(DEPDIR)/md5-1.Plo
|
||||
-rm -f ./$(DEPDIR)/md5.Plo
|
||||
-rm -f ./$(DEPDIR)/minmax.Plo
|
||||
-rm -f ./$(DEPDIR)/ops.Plo
|
||||
-rm -f ./$(DEPDIR)/pickdim.Plo
|
||||
-rm -f ./$(DEPDIR)/plan.Plo
|
||||
-rm -f ./$(DEPDIR)/planner.Plo
|
||||
-rm -f ./$(DEPDIR)/primes.Plo
|
||||
-rm -f ./$(DEPDIR)/print.Plo
|
||||
-rm -f ./$(DEPDIR)/problem.Plo
|
||||
-rm -f ./$(DEPDIR)/rader.Plo
|
||||
-rm -f ./$(DEPDIR)/scan.Plo
|
||||
-rm -f ./$(DEPDIR)/solver.Plo
|
||||
-rm -f ./$(DEPDIR)/solvtab.Plo
|
||||
-rm -f ./$(DEPDIR)/stride.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor1.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor2.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor3.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor4.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor5.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor7.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor8.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor9.Plo
|
||||
-rm -f ./$(DEPDIR)/tile2d.Plo
|
||||
-rm -f ./$(DEPDIR)/timer.Plo
|
||||
-rm -f ./$(DEPDIR)/transpose.Plo
|
||||
-rm -f ./$(DEPDIR)/trig.Plo
|
||||
-rm -f ./$(DEPDIR)/twiddle.Plo
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-compile distclean-generic \
|
||||
distclean-tags
|
||||
|
||||
dvi: dvi-am
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-am
|
||||
|
||||
html-am:
|
||||
|
||||
info: info-am
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am:
|
||||
|
||||
install-dvi: install-dvi-am
|
||||
|
||||
install-dvi-am:
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-am
|
||||
|
||||
install-html-am:
|
||||
|
||||
install-info: install-info-am
|
||||
|
||||
install-info-am:
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-am
|
||||
|
||||
install-pdf-am:
|
||||
|
||||
install-ps: install-ps-am
|
||||
|
||||
install-ps-am:
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-am
|
||||
-rm -f ./$(DEPDIR)/align.Plo
|
||||
-rm -f ./$(DEPDIR)/alloc.Plo
|
||||
-rm -f ./$(DEPDIR)/assert.Plo
|
||||
-rm -f ./$(DEPDIR)/awake.Plo
|
||||
-rm -f ./$(DEPDIR)/buffered.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy1d.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy2d-pair.Plo
|
||||
-rm -f ./$(DEPDIR)/cpy2d.Plo
|
||||
-rm -f ./$(DEPDIR)/ct.Plo
|
||||
-rm -f ./$(DEPDIR)/debug.Plo
|
||||
-rm -f ./$(DEPDIR)/extract-reim.Plo
|
||||
-rm -f ./$(DEPDIR)/hash.Plo
|
||||
-rm -f ./$(DEPDIR)/iabs.Plo
|
||||
-rm -f ./$(DEPDIR)/kalloc.Plo
|
||||
-rm -f ./$(DEPDIR)/md5-1.Plo
|
||||
-rm -f ./$(DEPDIR)/md5.Plo
|
||||
-rm -f ./$(DEPDIR)/minmax.Plo
|
||||
-rm -f ./$(DEPDIR)/ops.Plo
|
||||
-rm -f ./$(DEPDIR)/pickdim.Plo
|
||||
-rm -f ./$(DEPDIR)/plan.Plo
|
||||
-rm -f ./$(DEPDIR)/planner.Plo
|
||||
-rm -f ./$(DEPDIR)/primes.Plo
|
||||
-rm -f ./$(DEPDIR)/print.Plo
|
||||
-rm -f ./$(DEPDIR)/problem.Plo
|
||||
-rm -f ./$(DEPDIR)/rader.Plo
|
||||
-rm -f ./$(DEPDIR)/scan.Plo
|
||||
-rm -f ./$(DEPDIR)/solver.Plo
|
||||
-rm -f ./$(DEPDIR)/solvtab.Plo
|
||||
-rm -f ./$(DEPDIR)/stride.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor1.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor2.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor3.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor4.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor5.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor7.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor8.Plo
|
||||
-rm -f ./$(DEPDIR)/tensor9.Plo
|
||||
-rm -f ./$(DEPDIR)/tile2d.Plo
|
||||
-rm -f ./$(DEPDIR)/timer.Plo
|
||||
-rm -f ./$(DEPDIR)/transpose.Plo
|
||||
-rm -f ./$(DEPDIR)/trig.Plo
|
||||
-rm -f ./$(DEPDIR)/twiddle.Plo
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||
|
||||
mostlyclean: mostlyclean-am
|
||||
|
||||
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||
mostlyclean-libtool
|
||||
|
||||
pdf: pdf-am
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-am
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am:
|
||||
|
||||
.MAKE: install-am install-strip
|
||||
|
||||
.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
|
||||
clean-generic clean-libtool clean-noinstLTLIBRARIES \
|
||||
cscopelist-am ctags ctags-am distclean distclean-compile \
|
||||
distclean-generic distclean-libtool distclean-tags distdir dvi \
|
||||
dvi-am html html-am info info-am install install-am \
|
||||
install-data install-data-am install-dvi install-dvi-am \
|
||||
install-exec install-exec-am install-html install-html-am \
|
||||
install-info install-info-am install-man install-pdf \
|
||||
install-pdf-am install-ps install-ps-am install-strip \
|
||||
installcheck installcheck-am installdirs maintainer-clean \
|
||||
maintainer-clean-generic mostlyclean mostlyclean-compile \
|
||||
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
|
||||
tags tags-am uninstall uninstall-am
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
41
fftw-3.3.10/kernel/align.c
Normal file
41
fftw-3.3.10/kernel/align.c
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#if HAVE_SIMD
|
||||
# define ALGN 16
|
||||
#else
|
||||
/* disable the alignment machinery, because it will break,
|
||||
e.g., if sizeof(R) == 12 (as in long-double/x86) */
|
||||
# define ALGN 0
|
||||
#endif
|
||||
|
||||
/* NONPORTABLE */
|
||||
int X(ialignment_of)(R *p)
|
||||
{
|
||||
#if ALGN == 0
|
||||
UNUSED(p);
|
||||
return 0;
|
||||
#else
|
||||
return (int)(((uintptr_t) p) % ALGN);
|
||||
#endif
|
||||
}
|
||||
47
fftw-3.3.10/kernel/alloc.c
Normal file
47
fftw-3.3.10/kernel/alloc.c
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void *X(malloc_plain)(size_t n)
|
||||
{
|
||||
void *p;
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
p = X(kernel_malloc)(n);
|
||||
CK(p);
|
||||
|
||||
#ifdef MIN_ALIGNMENT
|
||||
A((((uintptr_t)p) % MIN_ALIGNMENT) == 0);
|
||||
#endif
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void X(ifree)(void *p)
|
||||
{
|
||||
X(kernel_free)(p);
|
||||
}
|
||||
|
||||
void X(ifree0)(void *p)
|
||||
{
|
||||
/* common pattern */
|
||||
if (p) X(ifree)(p);
|
||||
}
|
||||
34
fftw-3.3.10/kernel/assert.c
Normal file
34
fftw-3.3.10/kernel/assert.c
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
void X(assertion_failed)(const char *s, int line, const char *file)
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s);
|
||||
#ifdef HAVE_ABORT
|
||||
abort();
|
||||
#else
|
||||
exit(EXIT_FAILURE);
|
||||
#endif
|
||||
}
|
||||
29
fftw-3.3.10/kernel/awake.c
Normal file
29
fftw-3.3.10/kernel/awake.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(null_awake)(plan *ego, enum wakefulness wakefulness)
|
||||
{
|
||||
UNUSED(ego);
|
||||
UNUSED(wakefulness);
|
||||
/* do nothing */
|
||||
}
|
||||
82
fftw-3.3.10/kernel/buffered.c
Normal file
82
fftw-3.3.10/kernel/buffered.c
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* routines shared by the various buffered solvers */
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#define DEFAULT_MAXNBUF ((INT)256)
|
||||
|
||||
/* approx. 512KB of buffers for complex data */
|
||||
#define MAXBUFSZ (256 * 1024 / (INT)(sizeof(R)))
|
||||
|
||||
INT X(nbuf)(INT n, INT vl, INT maxnbuf)
|
||||
{
|
||||
INT i, nbuf, lb;
|
||||
|
||||
if (!maxnbuf)
|
||||
maxnbuf = DEFAULT_MAXNBUF;
|
||||
|
||||
nbuf = X(imin)(maxnbuf,
|
||||
X(imin)(vl, X(imax)((INT)1, MAXBUFSZ / n)));
|
||||
|
||||
/*
|
||||
* Look for a buffer number (not too small) that divides the
|
||||
* vector length, in order that we only need one child plan:
|
||||
*/
|
||||
lb = X(imax)(1, nbuf / 4);
|
||||
for (i = nbuf; i >= lb; --i)
|
||||
if (vl % i == 0)
|
||||
return i;
|
||||
|
||||
/* whatever... */
|
||||
return nbuf;
|
||||
}
|
||||
|
||||
#define SKEW 6 /* need to be even for SIMD */
|
||||
#define SKEWMOD 8
|
||||
|
||||
INT X(bufdist)(INT n, INT vl)
|
||||
{
|
||||
if (vl == 1)
|
||||
return n;
|
||||
else
|
||||
/* return smallest X such that X >= N and X == SKEW (mod SKEWMOD) */
|
||||
return n + X(modulo)(SKEW - n, SKEWMOD);
|
||||
}
|
||||
|
||||
int X(toobig)(INT n)
|
||||
{
|
||||
return n > MAXBUFSZ;
|
||||
}
|
||||
|
||||
/* TRUE if there exists i < which such that maxnbuf[i] and
|
||||
maxnbuf[which] yield the same value, in which case we canonicalize
|
||||
on the minimum value */
|
||||
int X(nbuf_redundant)(INT n, INT vl, size_t which,
|
||||
const INT *maxnbuf, size_t nmaxnbuf)
|
||||
{
|
||||
size_t i;
|
||||
(void)nmaxnbuf; /* UNUSED */
|
||||
for (i = 0; i < which; ++i)
|
||||
if (X(nbuf)(n, vl, maxnbuf[i]) == X(nbuf)(n, vl, maxnbuf[which]))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
70
fftw-3.3.10/kernel/cpy1d.c
Normal file
70
fftw-3.3.10/kernel/cpy1d.c
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* out of place 1D copy routine */
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl)
|
||||
{
|
||||
INT i0, v;
|
||||
|
||||
A(I != O);
|
||||
switch (vl) {
|
||||
case 1:
|
||||
if ((n0 & 1) || is0 != 1 || os0 != 1) {
|
||||
for (; n0 > 0; --n0, I += is0, O += os0)
|
||||
*O = *I;
|
||||
break;
|
||||
}
|
||||
n0 /= 2; is0 = 2; os0 = 2;
|
||||
/* fall through */
|
||||
case 2:
|
||||
if ((n0 & 1) || is0 != 2 || os0 != 2) {
|
||||
for (; n0 > 0; --n0, I += is0, O += os0) {
|
||||
R x0 = I[0];
|
||||
R x1 = I[1];
|
||||
O[0] = x0;
|
||||
O[1] = x1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
n0 /= 2; is0 = 4; os0 = 4;
|
||||
/* fall through */
|
||||
case 4:
|
||||
for (; n0 > 0; --n0, I += is0, O += os0) {
|
||||
R x0 = I[0];
|
||||
R x1 = I[1];
|
||||
R x2 = I[2];
|
||||
R x3 = I[3];
|
||||
O[0] = x0;
|
||||
O[1] = x1;
|
||||
O[2] = x2;
|
||||
O[3] = x3;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (i0 = 0; i0 < n0; ++i0)
|
||||
for (v = 0; v < vl; ++v) {
|
||||
R x0 = I[i0 * is0 + v];
|
||||
O[i0 * os0 + v] = x0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
68
fftw-3.3.10/kernel/cpy2d-pair.c
Normal file
68
fftw-3.3.10/kernel/cpy2d-pair.c
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* out of place copy routines for pairs of isomorphic 2D arrays */
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1)
|
||||
{
|
||||
INT i0, i1;
|
||||
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
R x0 = I0[i0 * is0 + i1 * is1];
|
||||
R x1 = I1[i0 * is0 + i1 * is1];
|
||||
O0[i0 * os0 + i1 * os1] = x0;
|
||||
O1[i0 * os0 + i1 * os1] = x1;
|
||||
}
|
||||
}
|
||||
|
||||
void X(zero1d_pair)(R *O0, R *O1, INT n0, INT os0)
|
||||
{
|
||||
INT i0;
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
O0[i0 * os0] = 0;
|
||||
O1[i0 * os0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* like cpy2d_pair, but read input contiguously if possible */
|
||||
void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1)
|
||||
{
|
||||
if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */
|
||||
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
|
||||
else
|
||||
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
|
||||
}
|
||||
|
||||
/* like cpy2d_pair, but write output contiguously if possible */
|
||||
void X(cpy2d_pair_co)(R *I0, R *I1, R *O0, R *O1,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1)
|
||||
{
|
||||
if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */
|
||||
X(cpy2d_pair) (I0, I1, O0, O1, n0, is0, os0, n1, is1, os1);
|
||||
else
|
||||
X(cpy2d_pair) (I0, I1, O0, O1, n1, is1, os1, n0, is0, os0);
|
||||
}
|
||||
207
fftw-3.3.10/kernel/cpy2d.c
Normal file
207
fftw-3.3.10/kernel/cpy2d.c
Normal file
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* out of place 2D copy routines */
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
|
||||
# ifdef HAVE_XMMINTRIN_H
|
||||
# include <xmmintrin.h>
|
||||
# define WIDE_TYPE __m128
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef WIDE_TYPE
|
||||
/* fall back to double, which means that WIDE_TYPE will be unused */
|
||||
# define WIDE_TYPE double
|
||||
#endif
|
||||
|
||||
void X(cpy2d)(R *I, R *O,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT vl)
|
||||
{
|
||||
INT i0, i1, v;
|
||||
|
||||
switch (vl) {
|
||||
case 1:
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
R x0 = I[i0 * is0 + i1 * is1];
|
||||
O[i0 * os0 + i1 * os1] = x0;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (1
|
||||
&& (2 * sizeof(R) == sizeof(WIDE_TYPE))
|
||||
&& (sizeof(WIDE_TYPE) > sizeof(double))
|
||||
&& (((size_t)I) % sizeof(WIDE_TYPE) == 0)
|
||||
&& (((size_t)O) % sizeof(WIDE_TYPE) == 0)
|
||||
&& ((is0 & 1) == 0)
|
||||
&& ((is1 & 1) == 0)
|
||||
&& ((os0 & 1) == 0)
|
||||
&& ((os1 & 1) == 0)) {
|
||||
/* copy R[2] as WIDE_TYPE if WIDE_TYPE is large
|
||||
enough to hold R[2], and if the input is
|
||||
properly aligned. This is a win when R==double
|
||||
and WIDE_TYPE is 128 bits. */
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
*(WIDE_TYPE *)&O[i0 * os0 + i1 * os1] =
|
||||
*(WIDE_TYPE *)&I[i0 * is0 + i1 * is1];
|
||||
}
|
||||
} else if (1
|
||||
&& (2 * sizeof(R) == sizeof(double))
|
||||
&& (((size_t)I) % sizeof(double) == 0)
|
||||
&& (((size_t)O) % sizeof(double) == 0)
|
||||
&& ((is0 & 1) == 0)
|
||||
&& ((is1 & 1) == 0)
|
||||
&& ((os0 & 1) == 0)
|
||||
&& ((os1 & 1) == 0)) {
|
||||
/* copy R[2] as double if double is large enough to
|
||||
hold R[2], and if the input is properly aligned.
|
||||
This case applies when R==float */
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
*(double *)&O[i0 * os0 + i1 * os1] =
|
||||
*(double *)&I[i0 * is0 + i1 * is1];
|
||||
}
|
||||
} else {
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0) {
|
||||
R x0 = I[i0 * is0 + i1 * is1];
|
||||
R x1 = I[i0 * is0 + i1 * is1 + 1];
|
||||
O[i0 * os0 + i1 * os1] = x0;
|
||||
O[i0 * os0 + i1 * os1 + 1] = x1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (i1 = 0; i1 < n1; ++i1)
|
||||
for (i0 = 0; i0 < n0; ++i0)
|
||||
for (v = 0; v < vl; ++v) {
|
||||
R x0 = I[i0 * is0 + i1 * is1 + v];
|
||||
O[i0 * os0 + i1 * os1 + v] = x0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* like cpy2d, but read input contiguously if possible */
|
||||
void X(cpy2d_ci)(R *I, R *O,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT vl)
|
||||
{
|
||||
if (IABS(is0) < IABS(is1)) /* inner loop is for n0 */
|
||||
X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl);
|
||||
else
|
||||
X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl);
|
||||
}
|
||||
|
||||
/* like cpy2d, but write output contiguously if possible */
|
||||
void X(cpy2d_co)(R *I, R *O,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT vl)
|
||||
{
|
||||
if (IABS(os0) < IABS(os1)) /* inner loop is for n0 */
|
||||
X(cpy2d) (I, O, n0, is0, os0, n1, is1, os1, vl);
|
||||
else
|
||||
X(cpy2d) (I, O, n1, is1, os1, n0, is0, os0, vl);
|
||||
}
|
||||
|
||||
|
||||
/* tiled copy routines */
|
||||
struct cpy2d_closure {
|
||||
R *I, *O;
|
||||
INT is0, os0, is1, os1, vl;
|
||||
R *buf;
|
||||
};
|
||||
|
||||
static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
|
||||
{
|
||||
struct cpy2d_closure *k = (struct cpy2d_closure *)args;
|
||||
X(cpy2d)(k->I + n0l * k->is0 + n1l * k->is1,
|
||||
k->O + n0l * k->os0 + n1l * k->os1,
|
||||
n0u - n0l, k->is0, k->os0,
|
||||
n1u - n1l, k->is1, k->os1,
|
||||
k->vl);
|
||||
}
|
||||
|
||||
static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
|
||||
{
|
||||
struct cpy2d_closure *k = (struct cpy2d_closure *)args;
|
||||
|
||||
/* copy from I to buf */
|
||||
X(cpy2d_ci)(k->I + n0l * k->is0 + n1l * k->is1,
|
||||
k->buf,
|
||||
n0u - n0l, k->is0, k->vl,
|
||||
n1u - n1l, k->is1, k->vl * (n0u - n0l),
|
||||
k->vl);
|
||||
|
||||
/* copy from buf to O */
|
||||
X(cpy2d_co)(k->buf,
|
||||
k->O + n0l * k->os0 + n1l * k->os1,
|
||||
n0u - n0l, k->vl, k->os0,
|
||||
n1u - n1l, k->vl * (n0u - n0l), k->os1,
|
||||
k->vl);
|
||||
}
|
||||
|
||||
|
||||
void X(cpy2d_tiled)(R *I, R *O,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1, INT vl)
|
||||
{
|
||||
INT tilesz = X(compute_tilesz)(vl,
|
||||
1 /* input array */
|
||||
+ 1 /* ouput array */);
|
||||
struct cpy2d_closure k;
|
||||
k.I = I;
|
||||
k.O = O;
|
||||
k.is0 = is0;
|
||||
k.os0 = os0;
|
||||
k.is1 = is1;
|
||||
k.os1 = os1;
|
||||
k.vl = vl;
|
||||
k.buf = 0; /* unused */
|
||||
X(tile2d)(0, n0, 0, n1, tilesz, dotile, &k);
|
||||
}
|
||||
|
||||
void X(cpy2d_tiledbuf)(R *I, R *O,
|
||||
INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1, INT vl)
|
||||
{
|
||||
R buf[CACHESIZE / (2 * sizeof(R))];
|
||||
/* input and buffer in cache, or
|
||||
output and buffer in cache */
|
||||
INT tilesz = X(compute_tilesz)(vl, 2);
|
||||
struct cpy2d_closure k;
|
||||
k.I = I;
|
||||
k.O = O;
|
||||
k.is0 = is0;
|
||||
k.os0 = os0;
|
||||
k.is1 = is1;
|
||||
k.os1 = os1;
|
||||
k.vl = vl;
|
||||
k.buf = buf;
|
||||
A(tilesz * tilesz * vl * sizeof(R) <= sizeof(buf));
|
||||
X(tile2d)(0, n0, 0, n1, tilesz, dotile_buf, &k);
|
||||
}
|
||||
31
fftw-3.3.10/kernel/ct.c
Normal file
31
fftw-3.3.10/kernel/ct.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* common routines for Cooley-Tukey algorithms */
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0))
|
||||
|
||||
/* TRUE if radix-r is ugly for size n */
|
||||
int X(ct_uglyp)(INT min_n, INT v, INT n, INT r)
|
||||
{
|
||||
return (n <= min_n) || (POW2P(n) && (v * (n / r)) <= 4);
|
||||
}
|
||||
564
fftw-3.3.10/kernel/cycle.h
Normal file
564
fftw-3.3.10/kernel/cycle.h
Normal file
@@ -0,0 +1,564 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* machine-dependent cycle counters code. Needs to be inlined. */
|
||||
|
||||
/***************************************************************************/
|
||||
/* To use the cycle counters in your code, simply #include "cycle.h" (this
|
||||
file), and then use the functions/macros:
|
||||
|
||||
ticks getticks(void);
|
||||
|
||||
ticks is an opaque typedef defined below, representing the current time.
|
||||
You extract the elapsed time between two calls to gettick() via:
|
||||
|
||||
double elapsed(ticks t1, ticks t0);
|
||||
|
||||
which returns a double-precision variable in arbitrary units. You
|
||||
are not expected to convert this into human units like seconds; it
|
||||
is intended only for *comparisons* of time intervals.
|
||||
|
||||
(In order to use some of the OS-dependent timer routines like
|
||||
Solaris' gethrtime, you need to paste the autoconf snippet below
|
||||
into your configure.ac file and #include "config.h" before cycle.h,
|
||||
or define the relevant macros manually if you are not using autoconf.)
|
||||
*/
|
||||
|
||||
/***************************************************************************/
|
||||
/* This file uses macros like HAVE_GETHRTIME that are assumed to be
|
||||
defined according to whether the corresponding function/type/header
|
||||
is available on your system. The necessary macros are most
|
||||
conveniently defined if you are using GNU autoconf, via the tests:
|
||||
|
||||
dnl ---------------------------------------------------------------------
|
||||
|
||||
AC_C_INLINE
|
||||
AC_HEADER_TIME
|
||||
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
|
||||
|
||||
AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif])
|
||||
|
||||
AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
|
||||
|
||||
dnl Cray UNICOS _rtc() (real-time clock) intrinsic
|
||||
AC_MSG_CHECKING([for _rtc intrinsic])
|
||||
rtc_ok=yes
|
||||
AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
|
||||
#include <intrinsics.h>
|
||||
#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
|
||||
AC_MSG_RESULT($rtc_ok)
|
||||
|
||||
dnl ---------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
#if TIME_WITH_SYS_TIME
|
||||
# include <sys/time.h>
|
||||
# include <time.h>
|
||||
#else
|
||||
# if HAVE_SYS_TIME_H
|
||||
# include <sys/time.h>
|
||||
# else
|
||||
# include <time.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
|
||||
{ \
|
||||
return (double)t1 - (double)t0; \
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/* Solaris */
|
||||
#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef hrtime_t ticks;
|
||||
|
||||
#define getticks gethrtime
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/* AIX v. 4+ routines to read the real-time clock or time-base register */
|
||||
#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef timebasestruct_t ticks;
|
||||
|
||||
static __inline ticks getticks(void)
|
||||
{
|
||||
ticks t;
|
||||
read_real_time(&t, TIMEBASE_SZ);
|
||||
return t;
|
||||
}
|
||||
|
||||
static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
|
||||
{
|
||||
time_base_to_time(&t1, TIMEBASE_SZ);
|
||||
time_base_to_time(&t0, TIMEBASE_SZ);
|
||||
return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 +
|
||||
((double)t1.tb_low - (double)t0.tb_low));
|
||||
}
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/*
|
||||
* PowerPC ``cycle'' counter using the time base register.
|
||||
*/
|
||||
#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
unsigned int tbl, tbu0, tbu1;
|
||||
|
||||
do {
|
||||
__asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
|
||||
__asm__ __volatile__ ("mftb %0" : "=r"(tbl));
|
||||
__asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
|
||||
} while (tbu0 != tbu1);
|
||||
|
||||
return (((unsigned long long)tbu0) << 32) | tbl;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
|
||||
from Carbon, requires no additional libraries to be linked). */
|
||||
#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
|
||||
#include <mach/mach_time.h>
|
||||
typedef uint64_t ticks;
|
||||
#define getticks mach_absolute_time
|
||||
INLINE_ELAPSED(__inline__)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/*
|
||||
* Pentium cycle counter
|
||||
*/
|
||||
#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
ticks ret;
|
||||
|
||||
__asm__ __volatile__("rdtsc": "=A" (ret));
|
||||
/* no input, nothing else clobbered */
|
||||
return ret;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */
|
||||
#endif
|
||||
|
||||
/* Visual C++ -- thanks to Morten Nissov for his help with this */
|
||||
#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
|
||||
#include <windows.h>
|
||||
typedef LARGE_INTEGER ticks;
|
||||
#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
|
||||
|
||||
static __inline ticks getticks(void)
|
||||
{
|
||||
ticks retval;
|
||||
|
||||
__asm {
|
||||
RDTSC
|
||||
mov retval.HighPart, edx
|
||||
mov retval.LowPart, eax
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
static __inline double elapsed(ticks t1, ticks t0)
|
||||
{
|
||||
return (double)t1.QuadPart - (double)t0.QuadPart;
|
||||
}
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/*
|
||||
* X86-64 cycle counter
|
||||
*/
|
||||
#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
unsigned a, d;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
return ((ticks)a) | (((ticks)d) << 32);
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#define TIME_MIN 5000.0
|
||||
#endif
|
||||
|
||||
/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
|
||||
NOTE: this code will fail to link unless you use the -Masmkeyword compiler
|
||||
option (grrr). */
|
||||
#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long long ticks;
|
||||
static ticks getticks(void)
|
||||
{
|
||||
asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; ");
|
||||
}
|
||||
INLINE_ELAPSED(__inline__)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#define TIME_MIN 5000.0
|
||||
#endif
|
||||
|
||||
/* Visual C++, courtesy of Dirk Michaelis */
|
||||
#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
|
||||
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(__rdtsc)
|
||||
typedef unsigned __int64 ticks;
|
||||
#define getticks __rdtsc
|
||||
INLINE_ELAPSED(__inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#define TIME_MIN 5000.0
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/*
|
||||
* IA64 cycle counter
|
||||
*/
|
||||
|
||||
/* intel's icc/ecc compiler */
|
||||
#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long ticks;
|
||||
#include <ia64intrin.h>
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
return __getReg(_IA64_REG_AR_ITC);
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/* gcc */
|
||||
#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
ticks ret;
|
||||
|
||||
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
|
||||
#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
|
||||
#include <machine/sys/inline.h>
|
||||
typedef unsigned long ticks;
|
||||
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
ticks ret;
|
||||
|
||||
ret = _Asm_mov_from_ar (_AREG_ITC);
|
||||
return ret;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/* Microsoft Visual C++ */
|
||||
#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned __int64 ticks;
|
||||
|
||||
# ifdef __cplusplus
|
||||
extern "C"
|
||||
# endif
|
||||
ticks __getReg(int whichReg);
|
||||
#pragma intrinsic(__getReg)
|
||||
|
||||
static __inline ticks getticks(void)
|
||||
{
|
||||
volatile ticks temp;
|
||||
temp = __getReg(3116);
|
||||
return temp;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/*
|
||||
* PA-RISC cycle counter
|
||||
*/
|
||||
#if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long ticks;
|
||||
|
||||
# ifdef __GNUC__
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
ticks ret;
|
||||
|
||||
__asm__ __volatile__("mfctl 16, %0": "=r" (ret));
|
||||
/* no input, nothing else clobbered */
|
||||
return ret;
|
||||
}
|
||||
# else
|
||||
# include <machine/inline.h>
|
||||
static inline unsigned long getticks(void)
|
||||
{
|
||||
register ticks ret;
|
||||
_MFCTL(16, ret);
|
||||
return ret;
|
||||
}
|
||||
# endif
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/* S390, courtesy of James Treacy */
|
||||
#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
ticks cycles;
|
||||
__asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
|
||||
return cycles;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
/*----------------------------------------------------------------*/
|
||||
#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
|
||||
/*
|
||||
* The 32-bit cycle counter on alpha overflows pretty quickly,
|
||||
* unfortunately. A 1GHz machine overflows in 4 seconds.
|
||||
*/
|
||||
typedef unsigned int ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
unsigned long cc;
|
||||
__asm__ __volatile__ ("rpcc %0" : "=r"(cc));
|
||||
return (cc & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
|
||||
typedef unsigned long ticks;
|
||||
|
||||
static __inline__ ticks getticks(void)
|
||||
{
|
||||
ticks ret;
|
||||
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline__)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
#if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
|
||||
# include <c_asm.h>
|
||||
typedef unsigned int ticks;
|
||||
|
||||
static __inline ticks getticks(void)
|
||||
{
|
||||
unsigned long cc;
|
||||
cc = asm("rpcc %v0");
|
||||
return (cc & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(__inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
/*----------------------------------------------------------------*/
|
||||
/* SGI/Irix */
|
||||
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) && !defined(__ANDROID__)
|
||||
typedef struct timespec ticks;
|
||||
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_SGI_CYCLE, &t);
|
||||
return t;
|
||||
}
|
||||
|
||||
static inline double elapsed(ticks t1, ticks t0)
|
||||
{
|
||||
return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 +
|
||||
((double)t1.tv_nsec - (double)t0.tv_nsec);
|
||||
}
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/* Cray UNICOS _rtc() intrinsic function */
|
||||
#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
|
||||
#ifdef HAVE_INTRINSICS_H
|
||||
# include <intrinsics.h>
|
||||
#endif
|
||||
|
||||
typedef long long ticks;
|
||||
|
||||
#define getticks _rtc
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
/* MIPS ZBus */
|
||||
#if HAVE_MIPS_ZBUS_TIMER
|
||||
#if defined(__mips__) && !defined(HAVE_TICK_COUNTER)
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
typedef uint64_t ticks;
|
||||
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
static uint64_t* addr = 0;
|
||||
|
||||
if (addr == 0)
|
||||
{
|
||||
uint32_t rq_addr = 0x10030000;
|
||||
int fd;
|
||||
int pgsize;
|
||||
|
||||
pgsize = getpagesize();
|
||||
fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
return NULL;
|
||||
}
|
||||
addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr);
|
||||
close(fd);
|
||||
if (addr == (uint64_t *)-1) {
|
||||
perror("mmap");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return *addr;
|
||||
}
|
||||
|
||||
INLINE_ELAPSED(inline)
|
||||
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
#endif /* HAVE_MIPS_ZBUS_TIMER */
|
||||
|
||||
#if defined(HAVE_ARMV7A_CNTVCT)
|
||||
typedef uint64_t ticks;
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
uint32_t Rt, Rt2 = 0;
|
||||
asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(Rt), "=r"(Rt2));
|
||||
return ((uint64_t)Rt) | (((uint64_t)Rt2) << 32);
|
||||
}
|
||||
INLINE_ELAPSED(inline)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ARMV7A_PMCCNTR)
|
||||
typedef uint64_t ticks;
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
uint32_t r;
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(r) );
|
||||
return r;
|
||||
}
|
||||
INLINE_ELAPSED(inline)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && !defined(HAVE_ARMV8_PMCCNTR_EL0)
|
||||
typedef uint64_t ticks;
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
uint64_t Rt;
|
||||
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (Rt));
|
||||
return Rt;
|
||||
}
|
||||
INLINE_ELAPSED(inline)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0)
|
||||
typedef uint64_t ticks;
|
||||
static inline ticks getticks(void)
|
||||
{
|
||||
uint64_t cc = 0;
|
||||
asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(cc));
|
||||
return cc;
|
||||
}
|
||||
INLINE_ELAPSED(inline)
|
||||
#define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
53
fftw-3.3.10/kernel/debug.c
Normal file
53
fftw-3.3.10/kernel/debug.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#ifdef FFTW_DEBUG
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
printer super;
|
||||
FILE *f;
|
||||
} P_file;
|
||||
|
||||
static void putchr_file(printer *p_, char c)
|
||||
{
|
||||
P_file *p = (P_file *) p_;
|
||||
fputc(c, p->f);
|
||||
}
|
||||
|
||||
static printer *mkprinter_file(FILE *f)
|
||||
{
|
||||
P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0);
|
||||
p->f = f;
|
||||
return &p->super;
|
||||
}
|
||||
|
||||
void X(debug)(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
printer *p = mkprinter_file(stderr);
|
||||
va_start(ap, format);
|
||||
p->vprint(p, format, ap);
|
||||
va_end(ap);
|
||||
X(printer_destroy)(p);
|
||||
}
|
||||
#endif
|
||||
36
fftw-3.3.10/kernel/extract-reim.c
Normal file
36
fftw-3.3.10/kernel/extract-reim.c
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* decompose complex pointer into real and imaginary parts.
|
||||
Flip real and imaginary if there the sign does not match
|
||||
FFTW's idea of what the sign should be */
|
||||
|
||||
void X(extract_reim)(int sign, R *c, R **r, R **i)
|
||||
{
|
||||
if (sign == FFT_SIGN) {
|
||||
*r = c + 0;
|
||||
*i = c + 1;
|
||||
} else {
|
||||
*r = c + 1;
|
||||
*i = c + 0;
|
||||
}
|
||||
}
|
||||
31
fftw-3.3.10/kernel/hash.c
Normal file
31
fftw-3.3.10/kernel/hash.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
unsigned X(hash)(const char *s)
|
||||
{
|
||||
unsigned h = 0xDEADBEEFu;
|
||||
do {
|
||||
h = h * 17 + (unsigned)(*s & 0xFF);
|
||||
} while (*s++);
|
||||
return h;
|
||||
}
|
||||
|
||||
27
fftw-3.3.10/kernel/iabs.c
Normal file
27
fftw-3.3.10/kernel/iabs.c
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
INT X(iabs)(INT a)
|
||||
{
|
||||
return a < 0 ? (0 - a) : a;
|
||||
}
|
||||
1143
fftw-3.3.10/kernel/ifftw.h
Normal file
1143
fftw-3.3.10/kernel/ifftw.h
Normal file
File diff suppressed because it is too large
Load Diff
144
fftw-3.3.10/kernel/kalloc.c
Normal file
144
fftw-3.3.10/kernel/kalloc.c
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#if defined(HAVE_MALLOC_H)
|
||||
# include <malloc.h>
|
||||
#endif
|
||||
|
||||
/* ``kernel'' malloc(), with proper memory alignment */
|
||||
|
||||
#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN
|
||||
extern void *memalign(size_t, size_t);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN
|
||||
extern int posix_memalign(void **, size_t, size_t);
|
||||
#endif
|
||||
|
||||
#if defined(macintosh) /* MacOS 9 */
|
||||
# include <Multiprocessing.h>
|
||||
#endif
|
||||
|
||||
#define real_free free /* memalign and malloc use ordinary free */
|
||||
|
||||
#define IS_POWER_OF_TWO(n) (((n) > 0) && (((n) & ((n) - 1)) == 0))
|
||||
#if defined(WITH_OUR_MALLOC) && (MIN_ALIGNMENT >= 8) && IS_POWER_OF_TWO(MIN_ALIGNMENT)
|
||||
/* Our own MIN_ALIGNMENT-aligned malloc/free. Assumes sizeof(void*) is a
|
||||
power of two <= 8 and that malloc is at least sizeof(void*)-aligned.
|
||||
|
||||
The main reason for this routine is that, as of this writing,
|
||||
Windows does not include any aligned allocation routines in its
|
||||
system libraries, and instead provides an implementation with a
|
||||
Visual C++ "Processor Pack" that you have to statically link into
|
||||
your program. We do not want to require users to have VC++
|
||||
(e.g. gcc/MinGW should be fine). Our code should be at least as good
|
||||
as the MS _aligned_malloc, in any case, according to second-hand
|
||||
reports of the algorithm it employs (also based on plain malloc). */
|
||||
static void *our_malloc(size_t n)
|
||||
{
|
||||
void *p0, *p;
|
||||
if (!(p0 = malloc(n + MIN_ALIGNMENT))) return (void *) 0;
|
||||
p = (void *) (((uintptr_t) p0 + MIN_ALIGNMENT) & (~((uintptr_t) (MIN_ALIGNMENT - 1))));
|
||||
*((void **) p - 1) = p0;
|
||||
return p;
|
||||
}
|
||||
static void our_free(void *p)
|
||||
{
|
||||
if (p) free(*((void **) p - 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
void *X(kernel_malloc)(size_t n)
|
||||
{
|
||||
void *p;
|
||||
|
||||
#if defined(MIN_ALIGNMENT)
|
||||
|
||||
# if defined(WITH_OUR_MALLOC)
|
||||
p = our_malloc(n);
|
||||
# undef real_free
|
||||
# define real_free our_free
|
||||
|
||||
# elif defined(__FreeBSD__) && (MIN_ALIGNMENT <= 16)
|
||||
/* FreeBSD does not have memalign, but its malloc is 16-byte aligned. */
|
||||
p = malloc(n);
|
||||
|
||||
# elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16)
|
||||
/* MacOS X malloc is already 16-byte aligned */
|
||||
p = malloc(n);
|
||||
|
||||
# elif defined(HAVE_MEMALIGN)
|
||||
p = memalign(MIN_ALIGNMENT, n);
|
||||
|
||||
# elif defined(HAVE_POSIX_MEMALIGN)
|
||||
/* note: posix_memalign is broken in glibc 2.2.5: it constrains
|
||||
the size, not the alignment, to be (power of two) * sizeof(void*).
|
||||
The bug seems to have been fixed as of glibc 2.3.1. */
|
||||
if (posix_memalign(&p, MIN_ALIGNMENT, n))
|
||||
p = (void*) 0;
|
||||
|
||||
# elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
|
||||
/* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */
|
||||
p = (void *) _mm_malloc(n, MIN_ALIGNMENT);
|
||||
# undef real_free
|
||||
# define real_free _mm_free
|
||||
|
||||
# elif defined(_MSC_VER)
|
||||
/* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD
|
||||
and _aligned_malloc/free (uses malloc.h) */
|
||||
p = (void *) _aligned_malloc(n, MIN_ALIGNMENT);
|
||||
# undef real_free
|
||||
# define real_free _aligned_free
|
||||
|
||||
# elif defined(macintosh) /* MacOS 9 */
|
||||
p = (void *) MPAllocateAligned(n,
|
||||
# if MIN_ALIGNMENT == 8
|
||||
kMPAllocate8ByteAligned,
|
||||
# elif MIN_ALIGNMENT == 16
|
||||
kMPAllocate16ByteAligned,
|
||||
# elif MIN_ALIGNMENT == 32
|
||||
kMPAllocate32ByteAligned,
|
||||
# else
|
||||
# error "Unknown alignment for MPAllocateAligned"
|
||||
# endif
|
||||
0);
|
||||
# undef real_free
|
||||
# define real_free MPFree
|
||||
|
||||
# else
|
||||
/* Add your machine here and send a patch to fftw@fftw.org
|
||||
or (e.g. for Windows) configure --with-our-malloc */
|
||||
# error "Don't know how to malloc() aligned memory ... try configuring --with-our-malloc"
|
||||
# endif
|
||||
|
||||
#else /* !defined(MIN_ALIGNMENT) */
|
||||
p = malloc(n);
|
||||
#endif
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void X(kernel_free)(void *p)
|
||||
{
|
||||
real_free(p);
|
||||
}
|
||||
54
fftw-3.3.10/kernel/md5-1.c
Normal file
54
fftw-3.3.10/kernel/md5-1.c
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
|
||||
void X(md5putb)(md5 *p, const void *d_, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
const unsigned char *d = (const unsigned char *)d_;
|
||||
for (i = 0; i < len; ++i)
|
||||
X(md5putc)(p, d[i]);
|
||||
}
|
||||
|
||||
void X(md5puts)(md5 *p, const char *s)
|
||||
{
|
||||
/* also hash final '\0' */
|
||||
do {
|
||||
X(md5putc)(p, (unsigned)(*s & 0xFF));
|
||||
} while(*s++);
|
||||
}
|
||||
|
||||
void X(md5int)(md5 *p, int i)
|
||||
{
|
||||
X(md5putb)(p, &i, sizeof(i));
|
||||
}
|
||||
|
||||
void X(md5INT)(md5 *p, INT i)
|
||||
{
|
||||
X(md5putb)(p, &i, sizeof(i));
|
||||
}
|
||||
|
||||
void X(md5unsigned)(md5 *p, unsigned i)
|
||||
{
|
||||
X(md5putb)(p, &i, sizeof(i));
|
||||
}
|
||||
|
||||
142
fftw-3.3.10/kernel/md5.c
Normal file
142
fftw-3.3.10/kernel/md5.c
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
independent implementation of Ron Rivest's MD5 message-digest
|
||||
algorithm, based on rfc 1321.
|
||||
|
||||
Optimized for small code size, not speed. Works as long as
|
||||
sizeof(md5uint) >= 4.
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */
|
||||
static const md5uint sintab[64] = {
|
||||
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
|
||||
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
|
||||
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
|
||||
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
|
||||
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
|
||||
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
|
||||
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
|
||||
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
|
||||
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
|
||||
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
|
||||
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
|
||||
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
|
||||
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
|
||||
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
|
||||
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
|
||||
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
|
||||
};
|
||||
|
||||
/* see rfc 1321 section 3.4 */
|
||||
static const struct roundtab {
|
||||
char k;
|
||||
char s;
|
||||
} roundtab[64] = {
|
||||
{ 0, 7}, { 1, 12}, { 2, 17}, { 3, 22},
|
||||
{ 4, 7}, { 5, 12}, { 6, 17}, { 7, 22},
|
||||
{ 8, 7}, { 9, 12}, { 10, 17}, { 11, 22},
|
||||
{ 12, 7}, { 13, 12}, { 14, 17}, { 15, 22},
|
||||
{ 1, 5}, { 6, 9}, { 11, 14}, { 0, 20},
|
||||
{ 5, 5}, { 10, 9}, { 15, 14}, { 4, 20},
|
||||
{ 9, 5}, { 14, 9}, { 3, 14}, { 8, 20},
|
||||
{ 13, 5}, { 2, 9}, { 7, 14}, { 12, 20},
|
||||
{ 5, 4}, { 8, 11}, { 11, 16}, { 14, 23},
|
||||
{ 1, 4}, { 4, 11}, { 7, 16}, { 10, 23},
|
||||
{ 13, 4}, { 0, 11}, { 3, 16}, { 6, 23},
|
||||
{ 9, 4}, { 12, 11}, { 15, 16}, { 2, 23},
|
||||
{ 0, 6}, { 7, 10}, { 14, 15}, { 5, 21},
|
||||
{ 12, 6}, { 3, 10}, { 10, 15}, { 1, 21},
|
||||
{ 8, 6}, { 15, 10}, { 6, 15}, { 13, 21},
|
||||
{ 4, 6}, { 11, 10}, { 2, 15}, { 9, 21}
|
||||
};
|
||||
|
||||
#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s))))
|
||||
|
||||
static void doblock(md5sig state, const unsigned char *data)
|
||||
{
|
||||
md5uint a, b, c, d, t, x[16];
|
||||
const md5uint msk = (md5uint)0xffffffffUL;
|
||||
int i;
|
||||
|
||||
/* encode input bytes into md5uint */
|
||||
for (i = 0; i < 16; ++i) {
|
||||
const unsigned char *p = data + 4 * i;
|
||||
x[i] = (unsigned)p[0] | ((unsigned)p[1] << 8) | ((unsigned)p[2] << 16) | ((unsigned)p[3] << 24);
|
||||
}
|
||||
|
||||
a = state[0]; b = state[1]; c = state[2]; d = state[3];
|
||||
for (i = 0; i < 64; ++i) {
|
||||
const struct roundtab *p = roundtab + i;
|
||||
switch (i >> 4) {
|
||||
case 0: a += (b & c) | (~b & d); break;
|
||||
case 1: a += (b & d) | (c & ~d); break;
|
||||
case 2: a += b ^ c ^ d; break;
|
||||
case 3: a += c ^ (b | ~d); break;
|
||||
}
|
||||
a += sintab[i];
|
||||
a += x[(int)(p->k)];
|
||||
a &= msk;
|
||||
t = b + rol(a, p->s);
|
||||
a = d; d = c; c = b; b = t;
|
||||
}
|
||||
state[0] = (state[0] + a) & msk;
|
||||
state[1] = (state[1] + b) & msk;
|
||||
state[2] = (state[2] + c) & msk;
|
||||
state[3] = (state[3] + d) & msk;
|
||||
}
|
||||
|
||||
|
||||
void X(md5begin)(md5 *p)
|
||||
{
|
||||
p->s[0] = 0x67452301;
|
||||
p->s[1] = 0xefcdab89;
|
||||
p->s[2] = 0x98badcfe;
|
||||
p->s[3] = 0x10325476;
|
||||
p->l = 0;
|
||||
}
|
||||
|
||||
void X(md5putc)(md5 *p, unsigned char c)
|
||||
{
|
||||
p->c[p->l % 64] = c;
|
||||
if (((++p->l) % 64) == 0) doblock(p->s, p->c);
|
||||
}
|
||||
|
||||
void X(md5end)(md5 *p)
|
||||
{
|
||||
unsigned l, i;
|
||||
|
||||
l = 8 * p->l; /* length before padding, in bits */
|
||||
|
||||
/* rfc 1321 section 3.1: padding */
|
||||
X(md5putc)(p, 0x80);
|
||||
while ((p->l % 64) != 56) X(md5putc)(p, 0x00);
|
||||
|
||||
/* rfc 1321 section 3.2: length (little endian) */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
X(md5putc)(p, (unsigned char)(l & 0xFF));
|
||||
l = l >> 8;
|
||||
}
|
||||
|
||||
/* Now p->l % 64 == 0 and signature is in p->s */
|
||||
}
|
||||
32
fftw-3.3.10/kernel/minmax.c
Normal file
32
fftw-3.3.10/kernel/minmax.c
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
INT X(imax)(INT a, INT b)
|
||||
{
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
INT X(imin)(INT a, INT b)
|
||||
{
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
62
fftw-3.3.10/kernel/ops.c
Normal file
62
fftw-3.3.10/kernel/ops.c
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(ops_zero)(opcnt *dst)
|
||||
{
|
||||
dst->add = dst->mul = dst->fma = dst->other = 0;
|
||||
}
|
||||
|
||||
void X(ops_cpy)(const opcnt *src, opcnt *dst)
|
||||
{
|
||||
*dst = *src;
|
||||
}
|
||||
|
||||
void X(ops_other)(INT o, opcnt *dst)
|
||||
{
|
||||
X(ops_zero)(dst);
|
||||
dst->other = o;
|
||||
}
|
||||
|
||||
void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst)
|
||||
{
|
||||
dst->add = m * a->add + b->add;
|
||||
dst->mul = m * a->mul + b->mul;
|
||||
dst->fma = m * a->fma + b->fma;
|
||||
dst->other = m * a->other + b->other;
|
||||
}
|
||||
|
||||
void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst)
|
||||
{
|
||||
X(ops_madd)(1, a, b, dst);
|
||||
}
|
||||
|
||||
void X(ops_add2)(const opcnt *a, opcnt *dst)
|
||||
{
|
||||
X(ops_add)(a, dst, dst);
|
||||
}
|
||||
|
||||
void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst)
|
||||
{
|
||||
X(ops_madd)(m, a, dst, dst);
|
||||
}
|
||||
|
||||
82
fftw-3.3.10/kernel/pickdim.c
Normal file
82
fftw-3.3.10/kernel/pickdim.c
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
|
||||
/* Given a solver which_dim, a vector sz, and whether or not the
|
||||
transform is out-of-place, return the actual dimension index that
|
||||
it corresponds to. The basic idea here is that we return the
|
||||
which_dim'th valid dimension, starting from the end if
|
||||
which_dim < 0. */
|
||||
static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp)
|
||||
{
|
||||
int i;
|
||||
int count_ok = 0;
|
||||
if (which_dim > 0) {
|
||||
for (i = 0; i < sz->rnk; ++i) {
|
||||
if (oop || sz->dims[i].is == sz->dims[i].os)
|
||||
if (++count_ok == which_dim) {
|
||||
*dp = i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (which_dim < 0) {
|
||||
for (i = sz->rnk - 1; i >= 0; --i) {
|
||||
if (oop || sz->dims[i].is == sz->dims[i].os)
|
||||
if (++count_ok == -which_dim) {
|
||||
*dp = i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* zero: pick the middle, if valid */
|
||||
i = (sz->rnk - 1) / 2;
|
||||
if (i >= 0 && (oop || sz->dims[i].is == sz->dims[i].os)) {
|
||||
*dp = i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Like really_pickdim, but only returns 1 if no previous "buddy"
|
||||
which_dim in the buddies list would give the same dim. */
|
||||
int X(pickdim)(int which_dim, const int *buddies, size_t nbuddies,
|
||||
const tensor *sz, int oop, int *dp)
|
||||
{
|
||||
size_t i;
|
||||
int d1;
|
||||
|
||||
if (!really_pickdim(which_dim, sz, oop, dp))
|
||||
return 0;
|
||||
|
||||
/* check whether some buddy solver would produce the same dim.
|
||||
If so, consider this solver unapplicable and let the buddy
|
||||
take care of it. The smallest-indexed buddy is applicable. */
|
||||
for (i = 0; i < nbuddies; ++i) {
|
||||
if (buddies[i] == which_dim)
|
||||
break; /* found self */
|
||||
if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1)
|
||||
return 0; /* found equivalent buddy */
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
70
fftw-3.3.10/kernel/plan.c
Normal file
70
fftw-3.3.10/kernel/plan.c
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* "Plan: To bother about the best method of accomplishing an
|
||||
accidental result." (Ambrose Bierce, The Enlarged Devil's
|
||||
Dictionary). */
|
||||
|
||||
plan *X(mkplan)(size_t size, const plan_adt *adt)
|
||||
{
|
||||
plan *p = (plan *)MALLOC(size, PLANS);
|
||||
|
||||
A(adt->destroy);
|
||||
p->adt = adt;
|
||||
X(ops_zero)(&p->ops);
|
||||
p->pcost = 0.0;
|
||||
p->wakefulness = SLEEPY;
|
||||
p->could_prune_now_p = 0;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* destroy a plan
|
||||
*/
|
||||
void X(plan_destroy_internal)(plan *ego)
|
||||
{
|
||||
if (ego) {
|
||||
A(ego->wakefulness == SLEEPY);
|
||||
ego->adt->destroy(ego);
|
||||
X(ifree)(ego);
|
||||
}
|
||||
}
|
||||
|
||||
/* dummy destroy routine for plans with no local state */
|
||||
void X(plan_null_destroy)(plan *ego)
|
||||
{
|
||||
UNUSED(ego);
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
void X(plan_awake)(plan *ego, enum wakefulness wakefulness)
|
||||
{
|
||||
if (ego) {
|
||||
A(((wakefulness == SLEEPY) ^ (ego->wakefulness == SLEEPY)));
|
||||
|
||||
ego->adt->awake(ego, wakefulness);
|
||||
ego->wakefulness = wakefulness;
|
||||
}
|
||||
}
|
||||
|
||||
1035
fftw-3.3.10/kernel/planner.c
Normal file
1035
fftw-3.3.10/kernel/planner.c
Normal file
File diff suppressed because it is too large
Load Diff
212
fftw-3.3.10/kernel/primes.c
Normal file
212
fftw-3.3.10/kernel/primes.c
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
/* Rader's algorithm requires lots of modular arithmetic, and if we
|
||||
aren't careful we can have errors due to integer overflows. */
|
||||
|
||||
/* Compute (x * y) mod p, but watch out for integer overflows; we must
|
||||
have 0 <= {x, y} < p.
|
||||
|
||||
If overflow is common, this routine is somewhat slower than
|
||||
e.g. using 'long long' arithmetic. However, it has the advantage
|
||||
of working when INT is 64 bits, and is also faster when overflow is
|
||||
rare. FFTW calls this via the MULMOD macro, which further
|
||||
optimizes for the case of small integers.
|
||||
*/
|
||||
|
||||
#define ADD_MOD(x, y, p) ((x) >= (p) - (y)) ? ((x) + ((y) - (p))) : ((x) + (y))
|
||||
|
||||
INT X(safe_mulmod)(INT x, INT y, INT p)
|
||||
{
|
||||
INT r;
|
||||
|
||||
if (y > x)
|
||||
return X(safe_mulmod)(y, x, p);
|
||||
|
||||
A(0 <= y && x < p);
|
||||
|
||||
r = 0;
|
||||
while (y) {
|
||||
r = ADD_MOD(r, x*(y&1), p); y >>= 1;
|
||||
x = ADD_MOD(x, x, p);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
/* Compute n^m mod p, where m >= 0 and p > 0. If we really cared, we
|
||||
could make this tail-recursive. */
|
||||
|
||||
INT X(power_mod)(INT n, INT m, INT p)
|
||||
{
|
||||
A(p > 0);
|
||||
if (m == 0)
|
||||
return 1;
|
||||
else if (m % 2 == 0) {
|
||||
INT x = X(power_mod)(n, m / 2, p);
|
||||
return MULMOD(x, x, p);
|
||||
}
|
||||
else
|
||||
return MULMOD(n, X(power_mod)(n, m - 1, p), p);
|
||||
}
|
||||
|
||||
/* the following two routines were contributed by Greg Dionne. */
|
||||
static INT get_prime_factors(INT n, INT *primef)
|
||||
{
|
||||
INT i;
|
||||
INT size = 0;
|
||||
|
||||
A(n % 2 == 0); /* this routine is designed only for even n */
|
||||
primef[size++] = (INT)2;
|
||||
do {
|
||||
n >>= 1;
|
||||
} while ((n & 1) == 0);
|
||||
|
||||
if (n == 1)
|
||||
return size;
|
||||
|
||||
for (i = 3; i * i <= n; i += 2)
|
||||
if (!(n % i)) {
|
||||
primef[size++] = i;
|
||||
do {
|
||||
n /= i;
|
||||
} while (!(n % i));
|
||||
}
|
||||
if (n == 1)
|
||||
return size;
|
||||
primef[size++] = n;
|
||||
return size;
|
||||
}
|
||||
|
||||
INT X(find_generator)(INT p)
|
||||
{
|
||||
INT n, i, size;
|
||||
INT primef[16]; /* smallest number = 32589158477190044730 > 2^64 */
|
||||
INT pm1 = p - 1;
|
||||
|
||||
if (p == 2)
|
||||
return 1;
|
||||
|
||||
size = get_prime_factors(pm1, primef);
|
||||
n = 2;
|
||||
for (i = 0; i < size; i++)
|
||||
if (X(power_mod)(n, pm1 / primef[i], p) == 1) {
|
||||
i = -1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Return first prime divisor of n (It would be at best slightly faster to
|
||||
search a static table of primes; there are 6542 primes < 2^16.) */
|
||||
INT X(first_divisor)(INT n)
|
||||
{
|
||||
INT i;
|
||||
if (n <= 1)
|
||||
return n;
|
||||
if (n % 2 == 0)
|
||||
return 2;
|
||||
for (i = 3; i*i <= n; i += 2)
|
||||
if (n % i == 0)
|
||||
return i;
|
||||
return n;
|
||||
}
|
||||
|
||||
int X(is_prime)(INT n)
|
||||
{
|
||||
return(n > 1 && X(first_divisor)(n) == n);
|
||||
}
|
||||
|
||||
INT X(next_prime)(INT n)
|
||||
{
|
||||
while (!X(is_prime)(n)) ++n;
|
||||
return n;
|
||||
}
|
||||
|
||||
int X(factors_into)(INT n, const INT *primes)
|
||||
{
|
||||
for (; *primes != 0; ++primes)
|
||||
while ((n % *primes) == 0)
|
||||
n /= *primes;
|
||||
return (n == 1);
|
||||
}
|
||||
|
||||
/* integer square root. Return floor(sqrt(N)) */
|
||||
INT X(isqrt)(INT n)
|
||||
{
|
||||
INT guess, iguess;
|
||||
|
||||
A(n >= 0);
|
||||
if (n == 0) return 0;
|
||||
|
||||
guess = n; iguess = 1;
|
||||
|
||||
do {
|
||||
guess = (guess + iguess) / 2;
|
||||
iguess = n / guess;
|
||||
} while (guess > iguess);
|
||||
|
||||
return guess;
|
||||
}
|
||||
|
||||
static INT isqrt_maybe(INT n)
|
||||
{
|
||||
INT guess = X(isqrt)(n);
|
||||
return guess * guess == n ? guess : 0;
|
||||
}
|
||||
|
||||
#define divides(a, b) (((b) % (a)) == 0)
|
||||
INT X(choose_radix)(INT r, INT n)
|
||||
{
|
||||
if (r > 0) {
|
||||
if (divides(r, n)) return r;
|
||||
return 0;
|
||||
} else if (r == 0) {
|
||||
return X(first_divisor)(n);
|
||||
} else {
|
||||
/* r is negative. If n = (-r) * q^2, take q as the radix */
|
||||
r = 0 - r;
|
||||
return (n > r && divides(r, n)) ? isqrt_maybe(n / r) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* return A mod N, works for all A including A < 0 */
|
||||
INT X(modulo)(INT a, INT n)
|
||||
{
|
||||
A(n > 0);
|
||||
if (a >= 0)
|
||||
return a % n;
|
||||
else
|
||||
return (n - 1) - ((-(a + (INT)1)) % n);
|
||||
}
|
||||
|
||||
/* TRUE if N factors into small primes */
|
||||
int X(factors_into_small_primes)(INT n)
|
||||
{
|
||||
static const INT primes[] = { 2, 3, 5, 0 };
|
||||
return X(factors_into)(n, primes);
|
||||
}
|
||||
244
fftw-3.3.10/kernel/print.c
Normal file
244
fftw-3.3.10/kernel/print.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define BSZ 64
|
||||
|
||||
static void myputs(printer *p, const char *s)
|
||||
{
|
||||
char c;
|
||||
while ((c = *s++))
|
||||
p->putchr(p, c);
|
||||
}
|
||||
|
||||
static void newline(printer *p)
|
||||
{
|
||||
int i;
|
||||
|
||||
p->putchr(p, '\n');
|
||||
for (i = 0; i < p->indent; ++i)
|
||||
p->putchr(p, ' ');
|
||||
}
|
||||
|
||||
static const char *digits = "0123456789abcdef";
|
||||
|
||||
static void putint(printer *p, INT i)
|
||||
{
|
||||
char buf[BSZ];
|
||||
char *f = buf;
|
||||
|
||||
if (i < 0) {
|
||||
p->putchr(p, '-');
|
||||
i = -i;
|
||||
}
|
||||
|
||||
do {
|
||||
*f++ = digits[i % 10];
|
||||
i /= 10;
|
||||
} while (i);
|
||||
|
||||
do {
|
||||
p->putchr(p, *--f);
|
||||
} while (f != buf);
|
||||
}
|
||||
|
||||
static void putulong(printer *p, unsigned long i, unsigned base, int width)
|
||||
{
|
||||
char buf[BSZ];
|
||||
char *f = buf;
|
||||
|
||||
do {
|
||||
*f++ = digits[i % base];
|
||||
i /= base;
|
||||
} while (i);
|
||||
|
||||
while (width > f - buf) {
|
||||
p->putchr(p, '0');
|
||||
--width;
|
||||
}
|
||||
|
||||
do {
|
||||
p->putchr(p, *--f);
|
||||
} while (f != buf);
|
||||
}
|
||||
|
||||
static void vprint(printer *p, const char *format, va_list ap)
|
||||
{
|
||||
const char *s = format;
|
||||
char c;
|
||||
INT ival;
|
||||
|
||||
while ((c = *s++)) {
|
||||
switch (c) {
|
||||
case '%':
|
||||
switch ((c = *s++)) {
|
||||
case 'M': {
|
||||
/* md5 value */
|
||||
md5uint x = va_arg(ap, md5uint);
|
||||
putulong(p, (unsigned long)(0xffffffffUL & x),
|
||||
16u, 8);
|
||||
break;
|
||||
}
|
||||
case 'c': {
|
||||
int x = va_arg(ap, int);
|
||||
p->putchr(p, (char)x);
|
||||
break;
|
||||
}
|
||||
case 's': {
|
||||
char *x = va_arg(ap, char *);
|
||||
if (x)
|
||||
myputs(p, x);
|
||||
else
|
||||
goto putnull;
|
||||
break;
|
||||
}
|
||||
case 'd': {
|
||||
int x = va_arg(ap, int);
|
||||
ival = (INT)x;
|
||||
goto putival;
|
||||
}
|
||||
case 'D': {
|
||||
ival = va_arg(ap, INT);
|
||||
goto putival;
|
||||
}
|
||||
case 'v': {
|
||||
/* print optional vector length */
|
||||
ival = va_arg(ap, INT);
|
||||
if (ival > 1) {
|
||||
myputs(p, "-x");
|
||||
goto putival;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'o': {
|
||||
/* integer option. Usage: %oNAME= */
|
||||
ival = va_arg(ap, INT);
|
||||
if (ival)
|
||||
p->putchr(p, '/');
|
||||
while ((c = *s++) != '=')
|
||||
if (ival)
|
||||
p->putchr(p, c);
|
||||
if (ival) {
|
||||
p->putchr(p, '=');
|
||||
goto putival;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'u': {
|
||||
unsigned x = va_arg(ap, unsigned);
|
||||
putulong(p, (unsigned long)x, 10u, 0);
|
||||
break;
|
||||
}
|
||||
case 'x': {
|
||||
unsigned x = va_arg(ap, unsigned);
|
||||
putulong(p, (unsigned long)x, 16u, 0);
|
||||
break;
|
||||
}
|
||||
case '(': {
|
||||
/* newline, augment indent level */
|
||||
p->indent += p->indent_incr;
|
||||
newline(p);
|
||||
break;
|
||||
}
|
||||
case ')': {
|
||||
/* decrement indent level */
|
||||
p->indent -= p->indent_incr;
|
||||
break;
|
||||
}
|
||||
case 'p': { /* note difference from C's %p */
|
||||
/* print plan */
|
||||
plan *x = va_arg(ap, plan *);
|
||||
if (x)
|
||||
x->adt->print(x, p);
|
||||
else
|
||||
goto putnull;
|
||||
break;
|
||||
}
|
||||
case 'P': {
|
||||
/* print problem */
|
||||
problem *x = va_arg(ap, problem *);
|
||||
if (x)
|
||||
x->adt->print(x, p);
|
||||
else
|
||||
goto putnull;
|
||||
break;
|
||||
}
|
||||
case 'T': {
|
||||
/* print tensor */
|
||||
tensor *x = va_arg(ap, tensor *);
|
||||
if (x)
|
||||
X(tensor_print)(x, p);
|
||||
else
|
||||
goto putnull;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
A(0 /* unknown format */);
|
||||
break;
|
||||
|
||||
putnull:
|
||||
myputs(p, "(null)");
|
||||
break;
|
||||
|
||||
putival:
|
||||
putint(p, ival);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
p->putchr(p, c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void print(printer *p, const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
vprint(p, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
printer *X(mkprinter)(size_t size,
|
||||
void (*putchr)(printer *p, char c),
|
||||
void (*cleanup)(printer *p))
|
||||
{
|
||||
printer *s = (printer *)MALLOC(size, OTHER);
|
||||
s->print = print;
|
||||
s->vprint = vprint;
|
||||
s->putchr = putchr;
|
||||
s->cleanup = cleanup;
|
||||
s->indent = 0;
|
||||
s->indent_incr = 2;
|
||||
return s;
|
||||
}
|
||||
|
||||
void X(printer_destroy)(printer *p)
|
||||
{
|
||||
if (p->cleanup)
|
||||
p->cleanup(p);
|
||||
X(ifree)(p);
|
||||
}
|
||||
78
fftw-3.3.10/kernel/problem.c
Normal file
78
fftw-3.3.10/kernel/problem.c
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* constructor */
|
||||
problem *X(mkproblem)(size_t sz, const problem_adt *adt)
|
||||
{
|
||||
problem *p = (problem *)MALLOC(sz, PROBLEMS);
|
||||
|
||||
p->adt = adt;
|
||||
return p;
|
||||
}
|
||||
|
||||
/* destructor */
|
||||
void X(problem_destroy)(problem *ego)
|
||||
{
|
||||
if (ego)
|
||||
ego->adt->destroy(ego);
|
||||
}
|
||||
|
||||
/* management of unsolvable problems */
|
||||
static void unsolvable_destroy(problem *ego)
|
||||
{
|
||||
UNUSED(ego);
|
||||
}
|
||||
|
||||
static void unsolvable_hash(const problem *p, md5 *m)
|
||||
{
|
||||
UNUSED(p);
|
||||
X(md5puts)(m, "unsolvable");
|
||||
}
|
||||
|
||||
static void unsolvable_print(const problem *ego, printer *p)
|
||||
{
|
||||
UNUSED(ego);
|
||||
p->print(p, "(unsolvable)");
|
||||
}
|
||||
|
||||
static void unsolvable_zero(const problem *ego)
|
||||
{
|
||||
UNUSED(ego);
|
||||
}
|
||||
|
||||
static const problem_adt padt =
|
||||
{
|
||||
PROBLEM_UNSOLVABLE,
|
||||
unsolvable_hash,
|
||||
unsolvable_zero,
|
||||
unsolvable_print,
|
||||
unsolvable_destroy
|
||||
};
|
||||
|
||||
/* there is no point in malloc'ing this one */
|
||||
static problem the_unsolvable_problem = { &padt };
|
||||
|
||||
problem *X(mkproblem_unsolvable)(void)
|
||||
{
|
||||
return &the_unsolvable_problem;
|
||||
}
|
||||
68
fftw-3.3.10/kernel/rader.c
Normal file
68
fftw-3.3.10/kernel/rader.c
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/*
|
||||
common routines for Rader solvers
|
||||
*/
|
||||
|
||||
|
||||
/* shared twiddle and omega lists, keyed by two/three integers. */
|
||||
struct rader_tls {
|
||||
INT k1, k2, k3;
|
||||
R *W;
|
||||
int refcnt;
|
||||
rader_tl *cdr;
|
||||
};
|
||||
|
||||
void X(rader_tl_insert)(INT k1, INT k2, INT k3, R *W, rader_tl **tl)
|
||||
{
|
||||
rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES);
|
||||
t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W;
|
||||
t->refcnt = 1; t->cdr = *tl; *tl = t;
|
||||
}
|
||||
|
||||
R *X(rader_tl_find)(INT k1, INT k2, INT k3, rader_tl *t)
|
||||
{
|
||||
while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3))
|
||||
t = t->cdr;
|
||||
if (t) {
|
||||
++t->refcnt;
|
||||
return t->W;
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void X(rader_tl_delete)(R *W, rader_tl **tl)
|
||||
{
|
||||
if (W) {
|
||||
rader_tl **tp, *t;
|
||||
|
||||
for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr)
|
||||
;
|
||||
|
||||
if (t && --t->refcnt <= 0) {
|
||||
*tp = t->cdr;
|
||||
X(ifree)(t->W);
|
||||
X(ifree)(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
204
fftw-3.3.10/kernel/scan.c
Normal file
204
fftw-3.3.10/kernel/scan.c
Normal file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef USE_CTYPE
|
||||
#include <ctype.h>
|
||||
#else
|
||||
/* Screw ctype. On linux, the is* functions call a routine that gets
|
||||
the ctype map in the current locale. Because this operation is
|
||||
expensive, the map is cached on a per-thread basis. I am not
|
||||
willing to link this crap with FFTW. Not over my dead body.
|
||||
|
||||
Sic transit gloria mundi.
|
||||
*/
|
||||
#undef isspace
|
||||
#define isspace(x) ((x) >= 0 && (x) <= ' ')
|
||||
#undef isdigit
|
||||
#define isdigit(x) ((x) >= '0' && (x) <= '9')
|
||||
#undef isupper
|
||||
#define isupper(x) ((x) >= 'A' && (x) <= 'Z')
|
||||
#undef islower
|
||||
#define islower(x) ((x) >= 'a' && (x) <= 'z')
|
||||
#endif
|
||||
|
||||
static int mygetc(scanner *sc)
|
||||
{
|
||||
if (sc->ungotc != EOF) {
|
||||
int c = sc->ungotc;
|
||||
sc->ungotc = EOF;
|
||||
return c;
|
||||
}
|
||||
return(sc->getchr(sc));
|
||||
}
|
||||
|
||||
#define GETCHR(sc) mygetc(sc)
|
||||
|
||||
static void myungetc(scanner *sc, int c)
|
||||
{
|
||||
sc->ungotc = c;
|
||||
}
|
||||
|
||||
#define UNGETCHR(sc, c) myungetc(sc, c)
|
||||
|
||||
static void eat_blanks(scanner *sc)
|
||||
{
|
||||
int ch;
|
||||
while (ch = GETCHR(sc), isspace(ch))
|
||||
;
|
||||
UNGETCHR(sc, ch);
|
||||
}
|
||||
|
||||
static void mygets(scanner *sc, char *s, int maxlen)
|
||||
{
|
||||
char *s0 = s;
|
||||
int ch;
|
||||
|
||||
A(maxlen > 0);
|
||||
while ((ch = GETCHR(sc)) != EOF && !isspace(ch)
|
||||
&& ch != ')' && ch != '(' && s < s0 + maxlen)
|
||||
*s++ = (char)(ch & 0xFF);
|
||||
*s = 0;
|
||||
UNGETCHR(sc, ch);
|
||||
}
|
||||
|
||||
static long getlong(scanner *sc, int base, int *ret)
|
||||
{
|
||||
int sign = 1, ch, count;
|
||||
long x = 0;
|
||||
|
||||
ch = GETCHR(sc);
|
||||
if (ch == '-' || ch == '+') {
|
||||
sign = ch == '-' ? -1 : 1;
|
||||
ch = GETCHR(sc);
|
||||
}
|
||||
for (count = 0; ; ++count) {
|
||||
if (isdigit(ch))
|
||||
ch -= '0';
|
||||
else if (isupper(ch))
|
||||
ch -= 'A' - 10;
|
||||
else if (islower(ch))
|
||||
ch -= 'a' - 10;
|
||||
else
|
||||
break;
|
||||
x = x * base + ch;
|
||||
ch = GETCHR(sc);
|
||||
}
|
||||
x *= sign;
|
||||
UNGETCHR(sc, ch);
|
||||
*ret = count > 0;
|
||||
return x;
|
||||
}
|
||||
|
||||
/* vscan is mostly scanf-like, with our additional format specifiers,
|
||||
but with a few twists. It returns simply 0 or 1 indicating whether
|
||||
the match was successful. '(' and ')' in the format string match
|
||||
those characters preceded by any whitespace. Finally, if a
|
||||
character match fails, it will ungetchr() the last character back
|
||||
onto the stream. */
|
||||
static int vscan(scanner *sc, const char *format, va_list ap)
|
||||
{
|
||||
const char *s = format;
|
||||
char c;
|
||||
int ch = 0;
|
||||
int fmt_len;
|
||||
|
||||
while ((c = *s++)) {
|
||||
fmt_len = 0;
|
||||
switch (c) {
|
||||
case '%':
|
||||
getformat:
|
||||
switch ((c = *s++)) {
|
||||
case 's': {
|
||||
char *x = va_arg(ap, char *);
|
||||
mygets(sc, x, fmt_len);
|
||||
break;
|
||||
}
|
||||
case 'd': {
|
||||
int *x = va_arg(ap, int *);
|
||||
*x = (int) getlong(sc, 10, &ch);
|
||||
if (!ch) return 0;
|
||||
break;
|
||||
}
|
||||
case 'x': {
|
||||
int *x = va_arg(ap, int *);
|
||||
*x = (int) getlong(sc, 16, &ch);
|
||||
if (!ch) return 0;
|
||||
break;
|
||||
}
|
||||
case 'M': {
|
||||
md5uint *x = va_arg(ap, md5uint *);
|
||||
*x = (md5uint)
|
||||
(0xFFFFFFFF & getlong(sc, 16, &ch));
|
||||
if (!ch) return 0;
|
||||
break;
|
||||
}
|
||||
case '*': {
|
||||
if ((fmt_len = va_arg(ap, int)) <= 0) return 0;
|
||||
goto getformat;
|
||||
}
|
||||
default:
|
||||
A(0 /* unknown format */);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (isspace(c) || c == '(' || c == ')')
|
||||
eat_blanks(sc);
|
||||
if (!isspace(c) && (ch = GETCHR(sc)) != c) {
|
||||
UNGETCHR(sc, ch);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int scan(scanner *sc, const char *format, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
ret = vscan(sc, format, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc))
|
||||
{
|
||||
scanner *s = (scanner *)MALLOC(size, OTHER);
|
||||
s->scan = scan;
|
||||
s->vscan = vscan;
|
||||
s->getchr = getchr;
|
||||
s->ungotc = EOF;
|
||||
return s;
|
||||
}
|
||||
|
||||
void X(scanner_destroy)(scanner *sc)
|
||||
{
|
||||
X(ifree)(sc);
|
||||
}
|
||||
50
fftw-3.3.10/kernel/solver.c
Normal file
50
fftw-3.3.10/kernel/solver.c
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
solver *X(mksolver)(size_t size, const solver_adt *adt)
|
||||
{
|
||||
solver *s = (solver *)MALLOC(size, SOLVERS);
|
||||
|
||||
s->adt = adt;
|
||||
s->refcnt = 0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void X(solver_use)(solver *ego)
|
||||
{
|
||||
++ego->refcnt;
|
||||
}
|
||||
|
||||
void X(solver_destroy)(solver *ego)
|
||||
{
|
||||
if ((--ego->refcnt) == 0) {
|
||||
if (ego->adt->destroy)
|
||||
ego->adt->destroy(ego);
|
||||
X(ifree)(ego);
|
||||
}
|
||||
}
|
||||
|
||||
void X(solver_register)(planner *plnr, solver *s)
|
||||
{
|
||||
plnr->adt->register_solver(plnr, s);
|
||||
}
|
||||
33
fftw-3.3.10/kernel/solvtab.c
Normal file
33
fftw-3.3.10/kernel/solvtab.c
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(solvtab_exec)(const solvtab tbl, planner *p)
|
||||
{
|
||||
for (; tbl->reg_nam; ++tbl) {
|
||||
p->cur_reg_nam = tbl->reg_nam;
|
||||
p->cur_reg_id = 0;
|
||||
tbl->reg(p);
|
||||
}
|
||||
p->cur_reg_nam = 0;
|
||||
}
|
||||
|
||||
45
fftw-3.3.10/kernel/stride.c
Normal file
45
fftw-3.3.10/kernel/stride.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
const INT X(an_INT_guaranteed_to_be_zero) = 0;
|
||||
|
||||
#ifdef PRECOMPUTE_ARRAY_INDICES
|
||||
stride X(mkstride)(INT n, INT s)
|
||||
{
|
||||
int i;
|
||||
INT *p;
|
||||
|
||||
A(n >= 0);
|
||||
p = (INT *) MALLOC((size_t)n * sizeof(INT), STRIDES);
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
p[i] = s * i;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void X(stride_destroy)(stride p)
|
||||
{
|
||||
X(ifree0)(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
123
fftw-3.3.10/kernel/tensor.c
Normal file
123
fftw-3.3.10/kernel/tensor.c
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
tensor *X(mktensor)(int rnk)
|
||||
{
|
||||
tensor *x;
|
||||
|
||||
A(rnk >= 0);
|
||||
|
||||
#if defined(STRUCT_HACK_KR)
|
||||
if (FINITE_RNK(rnk) && rnk > 1)
|
||||
x = (tensor *)MALLOC(sizeof(tensor) + (unsigned)(rnk - 1) * sizeof(iodim),
|
||||
TENSORS);
|
||||
else
|
||||
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
|
||||
#elif defined(STRUCT_HACK_C99)
|
||||
if (FINITE_RNK(rnk))
|
||||
x = (tensor *)MALLOC(sizeof(tensor) + (unsigned)rnk * sizeof(iodim),
|
||||
TENSORS);
|
||||
else
|
||||
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
|
||||
#else
|
||||
x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
|
||||
if (FINITE_RNK(rnk) && rnk > 0)
|
||||
x->dims = (iodim *)MALLOC(sizeof(iodim) * (unsigned)rnk, TENSORS);
|
||||
else
|
||||
x->dims = 0;
|
||||
#endif
|
||||
|
||||
x->rnk = rnk;
|
||||
return x;
|
||||
}
|
||||
|
||||
void X(tensor_destroy)(tensor *sz)
|
||||
{
|
||||
#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR)
|
||||
X(ifree0)(sz->dims);
|
||||
#endif
|
||||
X(ifree)(sz);
|
||||
}
|
||||
|
||||
INT X(tensor_sz)(const tensor *sz)
|
||||
{
|
||||
int i;
|
||||
INT n = 1;
|
||||
|
||||
if (!FINITE_RNK(sz->rnk))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < sz->rnk; ++i)
|
||||
n *= sz->dims[i].n;
|
||||
return n;
|
||||
}
|
||||
|
||||
void X(tensor_md5)(md5 *p, const tensor *t)
|
||||
{
|
||||
int i;
|
||||
X(md5int)(p, t->rnk);
|
||||
if (FINITE_RNK(t->rnk)) {
|
||||
for (i = 0; i < t->rnk; ++i) {
|
||||
const iodim *q = t->dims + i;
|
||||
X(md5INT)(p, q->n);
|
||||
X(md5INT)(p, q->is);
|
||||
X(md5INT)(p, q->os);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting
|
||||
appropriate n, is, and os components */
|
||||
int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os)
|
||||
{
|
||||
A(t->rnk <= 1);
|
||||
if (t->rnk == 1) {
|
||||
const iodim *vd = t->dims;
|
||||
*n = vd[0].n;
|
||||
*is = vd[0].is;
|
||||
*os = vd[0].os;
|
||||
} else {
|
||||
*n = 1;
|
||||
*is = *os = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void X(tensor_print)(const tensor *x, printer *p)
|
||||
{
|
||||
if (FINITE_RNK(x->rnk)) {
|
||||
int i;
|
||||
int first = 1;
|
||||
p->print(p, "(");
|
||||
for (i = 0; i < x->rnk; ++i) {
|
||||
const iodim *d = x->dims + i;
|
||||
p->print(p, "%s(%D %D %D)",
|
||||
first ? "" : " ",
|
||||
d->n, d->is, d->os);
|
||||
first = 0;
|
||||
}
|
||||
p->print(p, ")");
|
||||
} else {
|
||||
p->print(p, "rank-minfty");
|
||||
}
|
||||
}
|
||||
36
fftw-3.3.10/kernel/tensor1.c
Normal file
36
fftw-3.3.10/kernel/tensor1.c
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
tensor *X(mktensor_0d)(void)
|
||||
{
|
||||
return X(mktensor(0));
|
||||
}
|
||||
|
||||
tensor *X(mktensor_1d)(INT n, INT is, INT os)
|
||||
{
|
||||
tensor *x = X(mktensor)(1);
|
||||
x->dims[0].n = n;
|
||||
x->dims[0].is = is;
|
||||
x->dims[0].os = os;
|
||||
return x;
|
||||
}
|
||||
53
fftw-3.3.10/kernel/tensor2.c
Normal file
53
fftw-3.3.10/kernel/tensor2.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
tensor *X(mktensor_2d)(INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1)
|
||||
{
|
||||
tensor *x = X(mktensor)(2);
|
||||
x->dims[0].n = n0;
|
||||
x->dims[0].is = is0;
|
||||
x->dims[0].os = os0;
|
||||
x->dims[1].n = n1;
|
||||
x->dims[1].is = is1;
|
||||
x->dims[1].os = os1;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
tensor *X(mktensor_3d)(INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT n2, INT is2, INT os2)
|
||||
{
|
||||
tensor *x = X(mktensor)(3);
|
||||
x->dims[0].n = n0;
|
||||
x->dims[0].is = is0;
|
||||
x->dims[0].os = os0;
|
||||
x->dims[1].n = n1;
|
||||
x->dims[1].is = is1;
|
||||
x->dims[1].os = os1;
|
||||
x->dims[2].n = n2;
|
||||
x->dims[2].is = is2;
|
||||
x->dims[2].os = os2;
|
||||
return x;
|
||||
}
|
||||
72
fftw-3.3.10/kernel/tensor3.c
Normal file
72
fftw-3.3.10/kernel/tensor3.c
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* Currently, mktensor_4d and mktensor_5d are only used in the MPI
|
||||
routines, where very complicated transpositions are required.
|
||||
Therefore we split them into a separate source file. */
|
||||
|
||||
tensor *X(mktensor_4d)(INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT n2, INT is2, INT os2,
|
||||
INT n3, INT is3, INT os3)
|
||||
{
|
||||
tensor *x = X(mktensor)(4);
|
||||
x->dims[0].n = n0;
|
||||
x->dims[0].is = is0;
|
||||
x->dims[0].os = os0;
|
||||
x->dims[1].n = n1;
|
||||
x->dims[1].is = is1;
|
||||
x->dims[1].os = os1;
|
||||
x->dims[2].n = n2;
|
||||
x->dims[2].is = is2;
|
||||
x->dims[2].os = os2;
|
||||
x->dims[3].n = n3;
|
||||
x->dims[3].is = is3;
|
||||
x->dims[3].os = os3;
|
||||
return x;
|
||||
}
|
||||
|
||||
tensor *X(mktensor_5d)(INT n0, INT is0, INT os0,
|
||||
INT n1, INT is1, INT os1,
|
||||
INT n2, INT is2, INT os2,
|
||||
INT n3, INT is3, INT os3,
|
||||
INT n4, INT is4, INT os4)
|
||||
{
|
||||
tensor *x = X(mktensor)(5);
|
||||
x->dims[0].n = n0;
|
||||
x->dims[0].is = is0;
|
||||
x->dims[0].os = os0;
|
||||
x->dims[1].n = n1;
|
||||
x->dims[1].is = is1;
|
||||
x->dims[1].os = os1;
|
||||
x->dims[2].n = n2;
|
||||
x->dims[2].is = is2;
|
||||
x->dims[2].os = os2;
|
||||
x->dims[3].n = n3;
|
||||
x->dims[3].is = is3;
|
||||
x->dims[3].os = os3;
|
||||
x->dims[4].n = n4;
|
||||
x->dims[4].is = is4;
|
||||
x->dims[4].os = os4;
|
||||
return x;
|
||||
}
|
||||
104
fftw-3.3.10/kernel/tensor4.c
Normal file
104
fftw-3.3.10/kernel/tensor4.c
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
INT X(tensor_max_index)(const tensor *sz)
|
||||
{
|
||||
int i;
|
||||
INT ni = 0, no = 0;
|
||||
|
||||
A(FINITE_RNK(sz->rnk));
|
||||
for (i = 0; i < sz->rnk; ++i) {
|
||||
const iodim *p = sz->dims + i;
|
||||
ni += (p->n - 1) * X(iabs)(p->is);
|
||||
no += (p->n - 1) * X(iabs)(p->os);
|
||||
}
|
||||
return X(imax)(ni, no);
|
||||
}
|
||||
|
||||
#define tensor_min_xstride(sz, xs) { \
|
||||
A(FINITE_RNK(sz->rnk)); \
|
||||
if (sz->rnk == 0) return 0; \
|
||||
else { \
|
||||
int i; \
|
||||
INT s = X(iabs)(sz->dims[0].xs); \
|
||||
for (i = 1; i < sz->rnk; ++i) \
|
||||
s = X(imin)(s, X(iabs)(sz->dims[i].xs)); \
|
||||
return s; \
|
||||
} \
|
||||
}
|
||||
|
||||
INT X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is)
|
||||
INT X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os)
|
||||
|
||||
INT X(tensor_min_stride)(const tensor *sz)
|
||||
{
|
||||
return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz));
|
||||
}
|
||||
|
||||
int X(tensor_inplace_strides)(const tensor *sz)
|
||||
{
|
||||
int i;
|
||||
A(FINITE_RNK(sz->rnk));
|
||||
for (i = 0; i < sz->rnk; ++i) {
|
||||
const iodim *p = sz->dims + i;
|
||||
if (p->is != p->os)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int X(tensor_inplace_strides2)(const tensor *a, const tensor *b)
|
||||
{
|
||||
return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b));
|
||||
}
|
||||
|
||||
/* return true (1) iff *any* strides of sz decrease when we
|
||||
tensor_inplace_copy(sz, k). */
|
||||
static int tensor_strides_decrease(const tensor *sz, inplace_kind k)
|
||||
{
|
||||
if (FINITE_RNK(sz->rnk)) {
|
||||
int i;
|
||||
for (i = 0; i < sz->rnk; ++i)
|
||||
if ((sz->dims[i].os - sz->dims[i].is)
|
||||
* (k == INPLACE_OS ? (INT)1 : (INT)-1) < 0)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return true (1) iff *any* strides of sz decrease when we
|
||||
tensor_inplace_copy(k) *or* if *all* strides of sz are unchanged
|
||||
but *any* strides of vecsz decrease. This is used in indirect.c
|
||||
to determine whether to use INPLACE_IS or INPLACE_OS.
|
||||
|
||||
Note: X(tensor_strides_decrease)(sz, vecsz, INPLACE_IS)
|
||||
|| X(tensor_strides_decrease)(sz, vecsz, INPLACE_OS)
|
||||
|| X(tensor_inplace_strides2)(p->sz, p->vecsz)
|
||||
must always be true. */
|
||||
int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz,
|
||||
inplace_kind k)
|
||||
{
|
||||
return(tensor_strides_decrease(sz, k)
|
||||
|| (X(tensor_inplace_strides)(sz)
|
||||
&& tensor_strides_decrease(vecsz, k)));
|
||||
}
|
||||
92
fftw-3.3.10/kernel/tensor5.c
Normal file
92
fftw-3.3.10/kernel/tensor5.c
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
static void dimcpy(iodim *dst, const iodim *src, int rnk)
|
||||
{
|
||||
int i;
|
||||
if (FINITE_RNK(rnk))
|
||||
for (i = 0; i < rnk; ++i)
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
tensor *X(tensor_copy)(const tensor *sz)
|
||||
{
|
||||
tensor *x = X(mktensor)(sz->rnk);
|
||||
dimcpy(x->dims, sz->dims, sz->rnk);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* like X(tensor_copy), but makes strides in-place by
|
||||
setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */
|
||||
tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k)
|
||||
{
|
||||
tensor *x = X(tensor_copy)(sz);
|
||||
if (FINITE_RNK(x->rnk)) {
|
||||
int i;
|
||||
if (k == INPLACE_OS)
|
||||
for (i = 0; i < x->rnk; ++i)
|
||||
x->dims[i].is = x->dims[i].os;
|
||||
else
|
||||
for (i = 0; i < x->rnk; ++i)
|
||||
x->dims[i].os = x->dims[i].is;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Like X(tensor_copy), but copy all of the dimensions *except*
|
||||
except_dim. */
|
||||
tensor *X(tensor_copy_except)(const tensor *sz, int except_dim)
|
||||
{
|
||||
tensor *x;
|
||||
|
||||
A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk);
|
||||
x = X(mktensor)(sz->rnk - 1);
|
||||
dimcpy(x->dims, sz->dims, except_dim);
|
||||
dimcpy(x->dims + except_dim, sz->dims + except_dim + 1,
|
||||
x->rnk - except_dim);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Like X(tensor_copy), but copy only rnk dimensions starting
|
||||
with start_dim. */
|
||||
tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk)
|
||||
{
|
||||
tensor *x;
|
||||
|
||||
A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk);
|
||||
x = X(mktensor)(rnk);
|
||||
dimcpy(x->dims, sz->dims + start_dim, rnk);
|
||||
return x;
|
||||
}
|
||||
|
||||
tensor *X(tensor_append)(const tensor *a, const tensor *b)
|
||||
{
|
||||
if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) {
|
||||
return X(mktensor)(RNK_MINFTY);
|
||||
} else {
|
||||
tensor *x = X(mktensor)(a->rnk + b->rnk);
|
||||
dimcpy(x->dims, a->dims, a->rnk);
|
||||
dimcpy(x->dims + a->rnk, b->dims, b->rnk);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
215
fftw-3.3.10/kernel/tensor7.c
Normal file
215
fftw-3.3.10/kernel/tensor7.c
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
static int signof(INT x)
|
||||
{
|
||||
if (x < 0) return -1;
|
||||
if (x == 0) return 0;
|
||||
/* if (x > 0) */ return 1;
|
||||
}
|
||||
|
||||
/* total order among iodim's */
|
||||
int X(dimcmp)(const iodim *a, const iodim *b)
|
||||
{
|
||||
INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
|
||||
INT sao = X(iabs)(a->os), sbo = X(iabs)(b->os);
|
||||
INT sam = X(imin)(sai, sao), sbm = X(imin)(sbi, sbo);
|
||||
|
||||
/* in descending order of min{istride, ostride} */
|
||||
if (sam != sbm)
|
||||
return signof(sbm - sam);
|
||||
|
||||
/* in case of a tie, in descending order of istride */
|
||||
if (sbi != sai)
|
||||
return signof(sbi - sai);
|
||||
|
||||
/* in case of a tie, in descending order of ostride */
|
||||
if (sbo != sao)
|
||||
return signof(sbo - sao);
|
||||
|
||||
/* in case of a tie, in ascending order of n */
|
||||
return signof(a->n - b->n);
|
||||
}
|
||||
|
||||
static void canonicalize(tensor *x)
|
||||
{
|
||||
if (x->rnk > 1) {
|
||||
qsort(x->dims, (unsigned)x->rnk, sizeof(iodim),
|
||||
(int (*)(const void *, const void *))X(dimcmp));
|
||||
}
|
||||
}
|
||||
|
||||
static int compare_by_istride(const iodim *a, const iodim *b)
|
||||
{
|
||||
INT sai = X(iabs)(a->is), sbi = X(iabs)(b->is);
|
||||
|
||||
/* in descending order of istride */
|
||||
return signof(sbi - sai);
|
||||
}
|
||||
|
||||
static tensor *really_compress(const tensor *sz)
|
||||
{
|
||||
int i, rnk;
|
||||
tensor *x;
|
||||
|
||||
A(FINITE_RNK(sz->rnk));
|
||||
for (i = rnk = 0; i < sz->rnk; ++i) {
|
||||
A(sz->dims[i].n > 0);
|
||||
if (sz->dims[i].n != 1)
|
||||
++rnk;
|
||||
}
|
||||
|
||||
x = X(mktensor)(rnk);
|
||||
for (i = rnk = 0; i < sz->rnk; ++i) {
|
||||
if (sz->dims[i].n != 1)
|
||||
x->dims[rnk++] = sz->dims[i];
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Like tensor_copy, but eliminate n == 1 dimensions, which
|
||||
never affect any transform or transform vector.
|
||||
|
||||
Also, we sort the tensor into a canonical order of decreasing
|
||||
strides (see X(dimcmp) for an exact definition). In general,
|
||||
processing a loop/array in order of decreasing stride will improve
|
||||
locality. Both forward and backwards traversal of the tensor are
|
||||
considered e.g. by vrank-geq1, so sorting in increasing
|
||||
vs. decreasing order is not really important. */
|
||||
tensor *X(tensor_compress)(const tensor *sz)
|
||||
{
|
||||
tensor *x = really_compress(sz);
|
||||
canonicalize(x);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Return whether the strides of a and b are such that they form an
|
||||
effective contiguous 1d array. Assumes that a.is >= b.is. */
|
||||
static int strides_contig(iodim *a, iodim *b)
|
||||
{
|
||||
return (a->is == b->is * b->n && a->os == b->os * b->n);
|
||||
}
|
||||
|
||||
/* Like tensor_compress, but also compress into one dimension any
|
||||
group of dimensions that form a contiguous block of indices with
|
||||
some stride. (This can safely be done for transform vector sizes.) */
|
||||
tensor *X(tensor_compress_contiguous)(const tensor *sz)
|
||||
{
|
||||
int i, rnk;
|
||||
tensor *sz2, *x;
|
||||
|
||||
if (X(tensor_sz)(sz) == 0)
|
||||
return X(mktensor)(RNK_MINFTY);
|
||||
|
||||
sz2 = really_compress(sz);
|
||||
A(FINITE_RNK(sz2->rnk));
|
||||
|
||||
if (sz2->rnk <= 1) { /* nothing to compress. */
|
||||
if (0) {
|
||||
/* this call is redundant, because "sz->rnk <= 1" implies
|
||||
that the tensor is already canonical, but I am writing
|
||||
it explicitly because "logically" we need to canonicalize
|
||||
the tensor before returning. */
|
||||
canonicalize(sz2);
|
||||
}
|
||||
return sz2;
|
||||
}
|
||||
|
||||
/* sort in descending order of |istride|, so that compressible
|
||||
dimensions appear contigously */
|
||||
qsort(sz2->dims, (unsigned)sz2->rnk, sizeof(iodim),
|
||||
(int (*)(const void *, const void *))compare_by_istride);
|
||||
|
||||
/* compute what the rank will be after compression */
|
||||
for (i = rnk = 1; i < sz2->rnk; ++i)
|
||||
if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
|
||||
++rnk;
|
||||
|
||||
/* merge adjacent dimensions whenever possible */
|
||||
x = X(mktensor)(rnk);
|
||||
x->dims[0] = sz2->dims[0];
|
||||
for (i = rnk = 1; i < sz2->rnk; ++i) {
|
||||
if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
|
||||
x->dims[rnk - 1].n *= sz2->dims[i].n;
|
||||
x->dims[rnk - 1].is = sz2->dims[i].is;
|
||||
x->dims[rnk - 1].os = sz2->dims[i].os;
|
||||
} else {
|
||||
A(rnk < x->rnk);
|
||||
x->dims[rnk++] = sz2->dims[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(tensor_destroy)(sz2);
|
||||
|
||||
/* reduce to canonical form */
|
||||
canonicalize(x);
|
||||
return x;
|
||||
}
|
||||
|
||||
/* The inverse of X(tensor_append): splits the sz tensor into
|
||||
tensor a followed by tensor b, where a's rank is arnk. */
|
||||
void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
|
||||
{
|
||||
A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));
|
||||
|
||||
*a = X(tensor_copy_sub)(sz, 0, arnk);
|
||||
*b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
|
||||
}
|
||||
|
||||
/* TRUE if the two tensors are equal */
|
||||
int X(tensor_equal)(const tensor *a, const tensor *b)
|
||||
{
|
||||
if (a->rnk != b->rnk)
|
||||
return 0;
|
||||
|
||||
if (FINITE_RNK(a->rnk)) {
|
||||
int i;
|
||||
for (i = 0; i < a->rnk; ++i)
|
||||
if (0
|
||||
|| a->dims[i].n != b->dims[i].n
|
||||
|| a->dims[i].is != b->dims[i].is
|
||||
|| a->dims[i].os != b->dims[i].os
|
||||
)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* TRUE if the sets of input and output locations described by
|
||||
(append sz vecsz) are the same */
|
||||
int X(tensor_inplace_locations)(const tensor *sz, const tensor *vecsz)
|
||||
{
|
||||
tensor *t = X(tensor_append)(sz, vecsz);
|
||||
tensor *ti = X(tensor_copy_inplace)(t, INPLACE_IS);
|
||||
tensor *to = X(tensor_copy_inplace)(t, INPLACE_OS);
|
||||
tensor *tic = X(tensor_compress_contiguous)(ti);
|
||||
tensor *toc = X(tensor_compress_contiguous)(to);
|
||||
|
||||
int retval = X(tensor_equal)(tic, toc);
|
||||
|
||||
X(tensor_destroy)(t);
|
||||
X(tensor_destroy4)(ti, to, tic, toc);
|
||||
|
||||
return retval;
|
||||
}
|
||||
34
fftw-3.3.10/kernel/tensor8.c
Normal file
34
fftw-3.3.10/kernel/tensor8.c
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(tensor_destroy2)(tensor *a, tensor *b)
|
||||
{
|
||||
X(tensor_destroy)(a);
|
||||
X(tensor_destroy)(b);
|
||||
}
|
||||
|
||||
void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d)
|
||||
{
|
||||
X(tensor_destroy2)(a, b);
|
||||
X(tensor_destroy2)(c, d);
|
||||
}
|
||||
36
fftw-3.3.10/kernel/tensor9.c
Normal file
36
fftw-3.3.10/kernel/tensor9.c
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
int X(tensor_kosherp)(const tensor *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x->rnk < 0) return 0;
|
||||
|
||||
if (FINITE_RNK(x->rnk)) {
|
||||
for (i = 0; i < x->rnk; ++i)
|
||||
if (x->dims[i].n < 0)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
53
fftw-3.3.10/kernel/tile2d.c
Normal file
53
fftw-3.3.10/kernel/tile2d.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/* out of place 2D copy routines */
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz,
|
||||
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args),
|
||||
void *args)
|
||||
{
|
||||
INT d0, d1;
|
||||
|
||||
A(tilesz > 0); /* infinite loops otherwise */
|
||||
|
||||
tail:
|
||||
d0 = n0u - n0l;
|
||||
d1 = n1u - n1l;
|
||||
|
||||
if (d0 >= d1 && d0 > tilesz) {
|
||||
INT n0m = (n0u + n0l) / 2;
|
||||
X(tile2d)(n0l, n0m, n1l, n1u, tilesz, f, args);
|
||||
n0l = n0m; goto tail;
|
||||
} else if (/* d1 >= d0 && */ d1 > tilesz) {
|
||||
INT n1m = (n1u + n1l) / 2;
|
||||
X(tile2d)(n0l, n0u, n1l, n1m, tilesz, f, args);
|
||||
n1l = n1m; goto tail;
|
||||
} else {
|
||||
f(n0l, n0u, n1l, n1u, args);
|
||||
}
|
||||
}
|
||||
|
||||
INT X(compute_tilesz)(INT vl, int how_many_tiles_in_cache)
|
||||
{
|
||||
return X(isqrt)(CACHESIZE /
|
||||
(((INT)sizeof(R)) * vl * (INT)how_many_tiles_in_cache));
|
||||
}
|
||||
194
fftw-3.3.10/kernel/timer.c
Normal file
194
fftw-3.3.10/kernel/timer.c
Normal file
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifndef WITH_SLOW_TIMER
|
||||
# include "cycle.h"
|
||||
#endif
|
||||
|
||||
#ifndef FFTW_TIME_LIMIT
|
||||
#define FFTW_TIME_LIMIT 2.0 /* don't run for more than two seconds */
|
||||
#endif
|
||||
|
||||
/* the following code is disabled for now, because it seems to
|
||||
require that we #include <windows.h> in ifftw.h to
|
||||
typedef LARGE_INTEGER crude_time, and this pulls in the whole
|
||||
Windows universe and leads to namespace conflicts (unless
|
||||
we did some hack like assuming sizeof(LARGE_INTEGER) == sizeof(long long).
|
||||
gettimeofday is provided by MinGW, which we use to cross-compile
|
||||
FFTW for Windows, and this seems to work well enough */
|
||||
#if 0 && (defined(__WIN32__) || defined(_WIN32) || defined(_WIN64))
|
||||
crude_time X(get_crude_time)(void)
|
||||
{
|
||||
crude_time tv;
|
||||
QueryPerformanceCounter(&tv);
|
||||
return tv;
|
||||
}
|
||||
|
||||
static double elapsed_since(crude_time t0)
|
||||
{
|
||||
crude_time t1, freq;
|
||||
QueryPerformanceCounter(&t1);
|
||||
QueryPerformanceFrequency(&freq);
|
||||
return (((double) (t1.QuadPart - t0.QuadPart))) /
|
||||
((double) freq.QuadPart);
|
||||
}
|
||||
|
||||
# define TIME_MIN_SEC 1.0e-2
|
||||
|
||||
#elif defined(HAVE_GETTIMEOFDAY)
|
||||
crude_time X(get_crude_time)(void)
|
||||
{
|
||||
crude_time tv;
|
||||
gettimeofday(&tv, 0);
|
||||
return tv;
|
||||
}
|
||||
|
||||
#define elapsed_sec(t1,t0) ((double)(t1.tv_sec - t0.tv_sec) + \
|
||||
(double)(t1.tv_usec - t0.tv_usec) * 1.0E-6)
|
||||
|
||||
static double elapsed_since(crude_time t0)
|
||||
{
|
||||
crude_time t1;
|
||||
gettimeofday(&t1, 0);
|
||||
return elapsed_sec(t1, t0);
|
||||
}
|
||||
|
||||
# define TIME_MIN_SEC 1.0e-3
|
||||
|
||||
#else /* !HAVE_GETTIMEOFDAY */
|
||||
|
||||
/* Note that the only system where we are likely to need to fall back
|
||||
on the clock() function is Windows, for which CLOCKS_PER_SEC is 1000
|
||||
and thus the clock wraps once every 50 days. This should hopefully
|
||||
be longer than the time required to create any single plan! */
|
||||
crude_time X(get_crude_time)(void) { return clock(); }
|
||||
|
||||
#define elapsed_sec(t1,t0) ((double) ((t1) - (t0)) / CLOCKS_PER_SEC)
|
||||
|
||||
static double elapsed_since(crude_time t0)
|
||||
{
|
||||
return elapsed_sec(clock(), t0);
|
||||
}
|
||||
|
||||
# define TIME_MIN_SEC 2.0e-1 /* from fftw2 */
|
||||
|
||||
#endif /* !HAVE_GETTIMEOFDAY */
|
||||
|
||||
double X(elapsed_since)(const planner *plnr, const problem *p, crude_time t0)
|
||||
{
|
||||
double t = elapsed_since(t0);
|
||||
if (plnr->cost_hook)
|
||||
t = plnr->cost_hook(p, t, COST_MAX);
|
||||
return t;
|
||||
}
|
||||
|
||||
#ifdef WITH_SLOW_TIMER
|
||||
/* excruciatingly slow; only use this if there is no choice! */
|
||||
typedef crude_time ticks;
|
||||
# define getticks X(get_crude_time)
|
||||
# define elapsed(t1,t0) elapsed_sec(t1,t0)
|
||||
# define TIME_MIN TIME_MIN_SEC
|
||||
# define TIME_REPEAT 4 /* from fftw2 */
|
||||
# define HAVE_TICK_COUNTER
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_TICK_COUNTER
|
||||
|
||||
# ifndef TIME_MIN
|
||||
# define TIME_MIN 100.0
|
||||
# endif
|
||||
|
||||
# ifndef TIME_REPEAT
|
||||
# define TIME_REPEAT 8
|
||||
# endif
|
||||
|
||||
static double measure(plan *pln, const problem *p, int iter)
|
||||
{
|
||||
ticks t0, t1;
|
||||
int i;
|
||||
|
||||
t0 = getticks();
|
||||
for (i = 0; i < iter; ++i)
|
||||
pln->adt->solve(pln, p);
|
||||
t1 = getticks();
|
||||
return elapsed(t1, t0);
|
||||
}
|
||||
|
||||
|
||||
double X(measure_execution_time)(const planner *plnr,
|
||||
plan *pln, const problem *p)
|
||||
{
|
||||
int iter;
|
||||
int repeat;
|
||||
|
||||
X(plan_awake)(pln, AWAKE_ZERO);
|
||||
p->adt->zero(p);
|
||||
|
||||
start_over:
|
||||
for (iter = 1; iter; iter *= 2) {
|
||||
double tmin = 0;
|
||||
int first = 1;
|
||||
crude_time begin = X(get_crude_time)();
|
||||
|
||||
/* repeat the measurement TIME_REPEAT times */
|
||||
for (repeat = 0; repeat < TIME_REPEAT; ++repeat) {
|
||||
double t = measure(pln, p, iter);
|
||||
|
||||
if (plnr->cost_hook)
|
||||
t = plnr->cost_hook(p, t, COST_MAX);
|
||||
if (t < 0)
|
||||
goto start_over;
|
||||
|
||||
if (first || t < tmin)
|
||||
tmin = t;
|
||||
first = 0;
|
||||
|
||||
/* do not run for too long */
|
||||
if (X(elapsed_since)(plnr, p, begin) > FFTW_TIME_LIMIT)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tmin >= TIME_MIN) {
|
||||
X(plan_awake)(pln, SLEEPY);
|
||||
return tmin / (double) iter;
|
||||
}
|
||||
}
|
||||
goto start_over; /* may happen if timer is screwed up */
|
||||
}
|
||||
|
||||
#else /* no cycle counter */
|
||||
|
||||
double X(measure_execution_time)(const planner *plnr,
|
||||
plan *pln, const problem *p)
|
||||
{
|
||||
UNUSED(plnr);
|
||||
UNUSED(p);
|
||||
UNUSED(pln);
|
||||
return -1.0;
|
||||
}
|
||||
|
||||
#endif
|
||||
191
fftw-3.3.10/kernel/transpose.c
Normal file
191
fftw-3.3.10/kernel/transpose.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
|
||||
/* in place square transposition, iterative */
|
||||
void X(transpose)(R *I, INT n, INT s0, INT s1, INT vl)
|
||||
{
|
||||
INT i0, i1, v;
|
||||
|
||||
switch (vl) {
|
||||
case 1:
|
||||
for (i1 = 1; i1 < n; ++i1) {
|
||||
for (i0 = 0; i0 < i1; ++i0) {
|
||||
R x0 = I[i1 * s0 + i0 * s1];
|
||||
R y0 = I[i1 * s1 + i0 * s0];
|
||||
I[i1 * s1 + i0 * s0] = x0;
|
||||
I[i1 * s0 + i0 * s1] = y0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (i1 = 1; i1 < n; ++i1) {
|
||||
for (i0 = 0; i0 < i1; ++i0) {
|
||||
R x0 = I[i1 * s0 + i0 * s1];
|
||||
R x1 = I[i1 * s0 + i0 * s1 + 1];
|
||||
R y0 = I[i1 * s1 + i0 * s0];
|
||||
R y1 = I[i1 * s1 + i0 * s0 + 1];
|
||||
I[i1 * s1 + i0 * s0] = x0;
|
||||
I[i1 * s1 + i0 * s0 + 1] = x1;
|
||||
I[i1 * s0 + i0 * s1] = y0;
|
||||
I[i1 * s0 + i0 * s1 + 1] = y1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (i1 = 1; i1 < n; ++i1) {
|
||||
for (i0 = 0; i0 < i1; ++i0) {
|
||||
for (v = 0; v < vl; ++v) {
|
||||
R x0 = I[i1 * s0 + i0 * s1 + v];
|
||||
R y0 = I[i1 * s1 + i0 * s0 + v];
|
||||
I[i1 * s1 + i0 * s0 + v] = x0;
|
||||
I[i1 * s0 + i0 * s1 + v] = y0;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
struct transpose_closure {
|
||||
R *I;
|
||||
INT s0, s1, vl, tilesz;
|
||||
R *buf0, *buf1;
|
||||
};
|
||||
|
||||
static void dotile(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
|
||||
{
|
||||
struct transpose_closure *k = (struct transpose_closure *)args;
|
||||
R *I = k->I;
|
||||
INT s0 = k->s0, s1 = k->s1, vl = k->vl;
|
||||
INT i0, i1, v;
|
||||
|
||||
switch (vl) {
|
||||
case 1:
|
||||
for (i1 = n1l; i1 < n1u; ++i1) {
|
||||
for (i0 = n0l; i0 < n0u; ++i0) {
|
||||
R x0 = I[i1 * s0 + i0 * s1];
|
||||
R y0 = I[i1 * s1 + i0 * s0];
|
||||
I[i1 * s1 + i0 * s0] = x0;
|
||||
I[i1 * s0 + i0 * s1] = y0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (i1 = n1l; i1 < n1u; ++i1) {
|
||||
for (i0 = n0l; i0 < n0u; ++i0) {
|
||||
R x0 = I[i1 * s0 + i0 * s1];
|
||||
R x1 = I[i1 * s0 + i0 * s1 + 1];
|
||||
R y0 = I[i1 * s1 + i0 * s0];
|
||||
R y1 = I[i1 * s1 + i0 * s0 + 1];
|
||||
I[i1 * s1 + i0 * s0] = x0;
|
||||
I[i1 * s1 + i0 * s0 + 1] = x1;
|
||||
I[i1 * s0 + i0 * s1] = y0;
|
||||
I[i1 * s0 + i0 * s1 + 1] = y1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (i1 = n1l; i1 < n1u; ++i1) {
|
||||
for (i0 = n0l; i0 < n0u; ++i0) {
|
||||
for (v = 0; v < vl; ++v) {
|
||||
R x0 = I[i1 * s0 + i0 * s1 + v];
|
||||
R y0 = I[i1 * s1 + i0 * s0 + v];
|
||||
I[i1 * s1 + i0 * s0 + v] = x0;
|
||||
I[i1 * s0 + i0 * s1 + v] = y0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void dotile_buf(INT n0l, INT n0u, INT n1l, INT n1u, void *args)
|
||||
{
|
||||
struct transpose_closure *k = (struct transpose_closure *)args;
|
||||
X(cpy2d_ci)(k->I + n0l * k->s0 + n1l * k->s1,
|
||||
k->buf0,
|
||||
n0u - n0l, k->s0, k->vl,
|
||||
n1u - n1l, k->s1, k->vl * (n0u - n0l),
|
||||
k->vl);
|
||||
X(cpy2d_ci)(k->I + n0l * k->s1 + n1l * k->s0,
|
||||
k->buf1,
|
||||
n0u - n0l, k->s1, k->vl,
|
||||
n1u - n1l, k->s0, k->vl * (n0u - n0l),
|
||||
k->vl);
|
||||
X(cpy2d_co)(k->buf1,
|
||||
k->I + n0l * k->s0 + n1l * k->s1,
|
||||
n0u - n0l, k->vl, k->s0,
|
||||
n1u - n1l, k->vl * (n0u - n0l), k->s1,
|
||||
k->vl);
|
||||
X(cpy2d_co)(k->buf0,
|
||||
k->I + n0l * k->s1 + n1l * k->s0,
|
||||
n0u - n0l, k->vl, k->s1,
|
||||
n1u - n1l, k->vl * (n0u - n0l), k->s0,
|
||||
k->vl);
|
||||
}
|
||||
|
||||
static void transpose_rec(R *I, INT n,
|
||||
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u,
|
||||
void *args),
|
||||
struct transpose_closure *k)
|
||||
{
|
||||
tail:
|
||||
if (n > 1) {
|
||||
INT n2 = n / 2;
|
||||
k->I = I;
|
||||
X(tile2d)(0, n2, n2, n, k->tilesz, f, k);
|
||||
transpose_rec(I, n2, f, k);
|
||||
I += n2 * (k->s0 + k->s1); n -= n2; goto tail;
|
||||
}
|
||||
}
|
||||
|
||||
void X(transpose_tiled)(R *I, INT n, INT s0, INT s1, INT vl)
|
||||
{
|
||||
struct transpose_closure k;
|
||||
k.s0 = s0;
|
||||
k.s1 = s1;
|
||||
k.vl = vl;
|
||||
/* two blocks must be in cache, to be swapped */
|
||||
k.tilesz = X(compute_tilesz)(vl, 2);
|
||||
k.buf0 = k.buf1 = 0; /* unused */
|
||||
transpose_rec(I, n, dotile, &k);
|
||||
}
|
||||
|
||||
void X(transpose_tiledbuf)(R *I, INT n, INT s0, INT s1, INT vl)
|
||||
{
|
||||
struct transpose_closure k;
|
||||
/* Assume that the the rows of I conflict into the same cache
|
||||
lines, and therefore we don't need to reserve cache space for
|
||||
the input. If the rows don't conflict, there is no reason
|
||||
to use tiledbuf at all.*/
|
||||
R buf0[CACHESIZE / (2 * sizeof(R))];
|
||||
R buf1[CACHESIZE / (2 * sizeof(R))];
|
||||
k.s0 = s0;
|
||||
k.s1 = s1;
|
||||
k.vl = vl;
|
||||
k.tilesz = X(compute_tilesz)(vl, 2);
|
||||
k.buf0 = buf0;
|
||||
k.buf1 = buf1;
|
||||
A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf0));
|
||||
A(k.tilesz * k.tilesz * vl * sizeof(R) <= sizeof(buf1));
|
||||
transpose_rec(I, n, dotile_buf, &k);
|
||||
}
|
||||
|
||||
234
fftw-3.3.10/kernel/trig.c
Normal file
234
fftw-3.3.10/kernel/trig.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* trigonometric functions */
|
||||
#include "kernel/ifftw.h"
|
||||
#include <math.h>
|
||||
|
||||
#if defined(TRIGREAL_IS_LONG_DOUBLE)
|
||||
# define COS cosl
|
||||
# define SIN sinl
|
||||
# define KTRIG(x) (x##L)
|
||||
# if defined(HAVE_DECL_SINL) && !HAVE_DECL_SINL
|
||||
extern long double sinl(long double x);
|
||||
# endif
|
||||
# if defined(HAVE_DECL_COSL) && !HAVE_DECL_COSL
|
||||
extern long double cosl(long double x);
|
||||
# endif
|
||||
#elif defined(TRIGREAL_IS_QUAD)
|
||||
# define COS cosq
|
||||
# define SIN sinq
|
||||
# define KTRIG(x) (x##Q)
|
||||
extern __float128 sinq(__float128 x);
|
||||
extern __float128 cosq(__float128 x);
|
||||
#else
|
||||
# define COS cos
|
||||
# define SIN sin
|
||||
# define KTRIG(x) (x)
|
||||
#endif
|
||||
|
||||
static const trigreal K2PI =
|
||||
KTRIG(6.2831853071795864769252867665590057683943388);
|
||||
#define by2pi(m, n) ((K2PI * (m)) / (n))
|
||||
|
||||
/*
|
||||
* Improve accuracy by reducing x to range [0..1/8]
|
||||
* before multiplication by 2 * PI.
|
||||
*/
|
||||
|
||||
static void real_cexp(INT m, INT n, trigreal *out)
|
||||
{
|
||||
trigreal theta, c, s, t;
|
||||
unsigned octant = 0;
|
||||
INT quarter_n = n;
|
||||
|
||||
n += n; n += n;
|
||||
m += m; m += m;
|
||||
|
||||
if (m < 0) m += n;
|
||||
if (m > n - m) { m = n - m; octant |= 4; }
|
||||
if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
|
||||
if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
|
||||
|
||||
theta = by2pi(m, n);
|
||||
c = COS(theta); s = SIN(theta);
|
||||
|
||||
if (octant & 1) { t = c; c = s; s = t; }
|
||||
if (octant & 2) { t = c; c = -s; s = t; }
|
||||
if (octant & 4) { s = -s; }
|
||||
|
||||
out[0] = c;
|
||||
out[1] = s;
|
||||
}
|
||||
|
||||
static INT choose_twshft(INT n)
|
||||
{
|
||||
INT log2r = 0;
|
||||
while (n > 0) {
|
||||
++log2r;
|
||||
n /= 4;
|
||||
}
|
||||
return log2r;
|
||||
}
|
||||
|
||||
static void cexpl_sqrtn_table(triggen *p, INT m, trigreal *res)
|
||||
{
|
||||
m += p->n * (m < 0);
|
||||
|
||||
{
|
||||
INT m0 = m & p->twmsk;
|
||||
INT m1 = m >> p->twshft;
|
||||
trigreal wr0 = p->W0[2 * m0];
|
||||
trigreal wi0 = p->W0[2 * m0 + 1];
|
||||
trigreal wr1 = p->W1[2 * m1];
|
||||
trigreal wi1 = p->W1[2 * m1 + 1];
|
||||
|
||||
res[0] = wr1 * wr0 - wi1 * wi0;
|
||||
res[1] = wi1 * wr0 + wr1 * wi0;
|
||||
}
|
||||
}
|
||||
|
||||
/* multiply (xr, xi) by exp(FFT_SIGN * 2*pi*i*m/n) */
|
||||
static void rotate_sqrtn_table(triggen *p, INT m, R xr, R xi, R *res)
|
||||
{
|
||||
m += p->n * (m < 0);
|
||||
|
||||
{
|
||||
INT m0 = m & p->twmsk;
|
||||
INT m1 = m >> p->twshft;
|
||||
trigreal wr0 = p->W0[2 * m0];
|
||||
trigreal wi0 = p->W0[2 * m0 + 1];
|
||||
trigreal wr1 = p->W1[2 * m1];
|
||||
trigreal wi1 = p->W1[2 * m1 + 1];
|
||||
trigreal wr = wr1 * wr0 - wi1 * wi0;
|
||||
trigreal wi = wi1 * wr0 + wr1 * wi0;
|
||||
|
||||
#if FFT_SIGN == -1
|
||||
res[0] = xr * wr + xi * wi;
|
||||
res[1] = xi * wr - xr * wi;
|
||||
#else
|
||||
res[0] = xr * wr - xi * wi;
|
||||
res[1] = xi * wr + xr * wi;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void cexpl_sincos(triggen *p, INT m, trigreal *res)
|
||||
{
|
||||
real_cexp(m, p->n, res);
|
||||
}
|
||||
|
||||
static void cexp_zero(triggen *p, INT m, R *res)
|
||||
{
|
||||
UNUSED(p); UNUSED(m);
|
||||
res[0] = 0;
|
||||
res[1] = 0;
|
||||
}
|
||||
|
||||
static void cexpl_zero(triggen *p, INT m, trigreal *res)
|
||||
{
|
||||
UNUSED(p); UNUSED(m);
|
||||
res[0] = 0;
|
||||
res[1] = 0;
|
||||
}
|
||||
|
||||
static void cexp_generic(triggen *p, INT m, R *res)
|
||||
{
|
||||
trigreal resl[2];
|
||||
p->cexpl(p, m, resl);
|
||||
res[0] = (R)resl[0];
|
||||
res[1] = (R)resl[1];
|
||||
}
|
||||
|
||||
static void rotate_generic(triggen *p, INT m, R xr, R xi, R *res)
|
||||
{
|
||||
trigreal w[2];
|
||||
p->cexpl(p, m, w);
|
||||
res[0] = xr * w[0] - xi * (FFT_SIGN * w[1]);
|
||||
res[1] = xi * w[0] + xr * (FFT_SIGN * w[1]);
|
||||
}
|
||||
|
||||
triggen *X(mktriggen)(enum wakefulness wakefulness, INT n)
|
||||
{
|
||||
INT i, n0, n1;
|
||||
triggen *p = (triggen *)MALLOC(sizeof(*p), TWIDDLES);
|
||||
|
||||
p->n = n;
|
||||
p->W0 = p->W1 = 0;
|
||||
p->cexp = 0;
|
||||
p->rotate = 0;
|
||||
|
||||
switch (wakefulness) {
|
||||
case SLEEPY:
|
||||
A(0 /* can't happen */);
|
||||
break;
|
||||
|
||||
case AWAKE_SQRTN_TABLE: {
|
||||
INT twshft = choose_twshft(n);
|
||||
|
||||
p->twshft = twshft;
|
||||
p->twradix = ((INT)1) << twshft;
|
||||
p->twmsk = p->twradix - 1;
|
||||
|
||||
n0 = p->twradix;
|
||||
n1 = (n + n0 - 1) / n0;
|
||||
|
||||
p->W0 = (trigreal *)MALLOC(n0 * 2 * sizeof(trigreal), TWIDDLES);
|
||||
p->W1 = (trigreal *)MALLOC(n1 * 2 * sizeof(trigreal), TWIDDLES);
|
||||
|
||||
for (i = 0; i < n0; ++i)
|
||||
real_cexp(i, n, p->W0 + 2 * i);
|
||||
|
||||
for (i = 0; i < n1; ++i)
|
||||
real_cexp(i * p->twradix, n, p->W1 + 2 * i);
|
||||
|
||||
p->cexpl = cexpl_sqrtn_table;
|
||||
p->rotate = rotate_sqrtn_table;
|
||||
break;
|
||||
}
|
||||
|
||||
case AWAKE_SINCOS:
|
||||
p->cexpl = cexpl_sincos;
|
||||
break;
|
||||
|
||||
case AWAKE_ZERO:
|
||||
p->cexp = cexp_zero;
|
||||
p->cexpl = cexpl_zero;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!p->cexp) {
|
||||
if (sizeof(trigreal) == sizeof(R))
|
||||
p->cexp = (void (*)(triggen *, INT, R *))p->cexpl;
|
||||
else
|
||||
p->cexp = cexp_generic;
|
||||
}
|
||||
if (!p->rotate)
|
||||
p->rotate = rotate_generic;
|
||||
return p;
|
||||
}
|
||||
|
||||
void X(triggen_destroy)(triggen *p)
|
||||
{
|
||||
X(ifree0)(p->W0);
|
||||
X(ifree0)(p->W1);
|
||||
X(ifree)(p);
|
||||
}
|
||||
256
fftw-3.3.10/kernel/twiddle.c
Normal file
256
fftw-3.3.10/kernel/twiddle.c
Normal file
@@ -0,0 +1,256 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Twiddle manipulation */
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
#include <math.h>
|
||||
|
||||
#define HASHSZ 109
|
||||
|
||||
/* hash table of known twiddle factors */
|
||||
static twid *twlist[HASHSZ];
|
||||
|
||||
static INT hash(INT n, INT r)
|
||||
{
|
||||
INT h = n * 17 + r;
|
||||
|
||||
if (h < 0) h = -h;
|
||||
|
||||
return (h % HASHSZ);
|
||||
}
|
||||
|
||||
static int equal_instr(const tw_instr *p, const tw_instr *q)
|
||||
{
|
||||
if (p == q)
|
||||
return 1;
|
||||
|
||||
for (;; ++p, ++q) {
|
||||
if (p->op != q->op)
|
||||
return 0;
|
||||
|
||||
switch (p->op) {
|
||||
case TW_NEXT:
|
||||
return (p->v == q->v); /* p->i is ignored */
|
||||
|
||||
case TW_FULL:
|
||||
case TW_HALF:
|
||||
if (p->v != q->v) return 0; /* p->i is ignored */
|
||||
break;
|
||||
|
||||
default:
|
||||
if (p->v != q->v || p->i != q->i) return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
A(0 /* can't happen */);
|
||||
}
|
||||
|
||||
static int ok_twid(const twid *t,
|
||||
enum wakefulness wakefulness,
|
||||
const tw_instr *q, INT n, INT r, INT m)
|
||||
{
|
||||
return (wakefulness == t->wakefulness &&
|
||||
n == t->n &&
|
||||
r == t->r &&
|
||||
m <= t->m &&
|
||||
equal_instr(t->instr, q));
|
||||
}
|
||||
|
||||
static twid *lookup(enum wakefulness wakefulness,
|
||||
const tw_instr *q, INT n, INT r, INT m)
|
||||
{
|
||||
twid *p;
|
||||
|
||||
for (p = twlist[hash(n,r)];
|
||||
p && !ok_twid(p, wakefulness, q, n, r, m);
|
||||
p = p->cdr)
|
||||
;
|
||||
return p;
|
||||
}
|
||||
|
||||
static INT twlen0(INT r, const tw_instr *p, INT *vl)
|
||||
{
|
||||
INT ntwiddle = 0;
|
||||
|
||||
/* compute length of bytecode program */
|
||||
A(r > 0);
|
||||
for ( ; p->op != TW_NEXT; ++p) {
|
||||
switch (p->op) {
|
||||
case TW_FULL:
|
||||
ntwiddle += (r - 1) * 2;
|
||||
break;
|
||||
case TW_HALF:
|
||||
ntwiddle += (r - 1);
|
||||
break;
|
||||
case TW_CEXP:
|
||||
ntwiddle += 2;
|
||||
break;
|
||||
case TW_COS:
|
||||
case TW_SIN:
|
||||
ntwiddle += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*vl = (INT)p->v;
|
||||
return ntwiddle;
|
||||
}
|
||||
|
||||
INT X(twiddle_length)(INT r, const tw_instr *p)
|
||||
{
|
||||
INT vl;
|
||||
return twlen0(r, p, &vl);
|
||||
}
|
||||
|
||||
static R *compute(enum wakefulness wakefulness,
|
||||
const tw_instr *instr, INT n, INT r, INT m)
|
||||
{
|
||||
INT ntwiddle, j, vl;
|
||||
R *W, *W0;
|
||||
const tw_instr *p;
|
||||
triggen *t = X(mktriggen)(wakefulness, n);
|
||||
|
||||
p = instr;
|
||||
ntwiddle = twlen0(r, p, &vl);
|
||||
|
||||
A(m % vl == 0);
|
||||
|
||||
W0 = W = (R *)MALLOC((ntwiddle * (m / vl)) * sizeof(R), TWIDDLES);
|
||||
|
||||
for (j = 0; j < m; j += vl) {
|
||||
for (p = instr; p->op != TW_NEXT; ++p) {
|
||||
switch (p->op) {
|
||||
case TW_FULL: {
|
||||
INT i;
|
||||
for (i = 1; i < r; ++i) {
|
||||
A((j + (INT)p->v) * i < n);
|
||||
A((j + (INT)p->v) * i > -n);
|
||||
t->cexp(t, (j + (INT)p->v) * i, W);
|
||||
W += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case TW_HALF: {
|
||||
INT i;
|
||||
A((r % 2) == 1);
|
||||
for (i = 1; i + i < r; ++i) {
|
||||
t->cexp(t, MULMOD(i, (j + (INT)p->v), n), W);
|
||||
W += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case TW_COS: {
|
||||
R d[2];
|
||||
|
||||
A((j + (INT)p->v) * p->i < n);
|
||||
A((j + (INT)p->v) * p->i > -n);
|
||||
t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
|
||||
*W++ = d[0];
|
||||
break;
|
||||
}
|
||||
|
||||
case TW_SIN: {
|
||||
R d[2];
|
||||
|
||||
A((j + (INT)p->v) * p->i < n);
|
||||
A((j + (INT)p->v) * p->i > -n);
|
||||
t->cexp(t, (j + (INT)p->v) * (INT)p->i, d);
|
||||
*W++ = d[1];
|
||||
break;
|
||||
}
|
||||
|
||||
case TW_CEXP:
|
||||
A((j + (INT)p->v) * p->i < n);
|
||||
A((j + (INT)p->v) * p->i > -n);
|
||||
t->cexp(t, (j + (INT)p->v) * (INT)p->i, W);
|
||||
W += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
X(triggen_destroy)(t);
|
||||
return W0;
|
||||
}
|
||||
|
||||
static void mktwiddle(enum wakefulness wakefulness,
|
||||
twid **pp, const tw_instr *instr, INT n, INT r, INT m)
|
||||
{
|
||||
twid *p;
|
||||
INT h;
|
||||
|
||||
if ((p = lookup(wakefulness, instr, n, r, m))) {
|
||||
++p->refcnt;
|
||||
} else {
|
||||
p = (twid *) MALLOC(sizeof(twid), TWIDDLES);
|
||||
p->n = n;
|
||||
p->r = r;
|
||||
p->m = m;
|
||||
p->instr = instr;
|
||||
p->refcnt = 1;
|
||||
p->wakefulness = wakefulness;
|
||||
p->W = compute(wakefulness, instr, n, r, m);
|
||||
|
||||
/* cons! onto twlist */
|
||||
h = hash(n, r);
|
||||
p->cdr = twlist[h];
|
||||
twlist[h] = p;
|
||||
}
|
||||
|
||||
*pp = p;
|
||||
}
|
||||
|
||||
static void twiddle_destroy(twid **pp)
|
||||
{
|
||||
twid *p = *pp;
|
||||
twid **q;
|
||||
|
||||
if ((--p->refcnt) == 0) {
|
||||
/* remove p from twiddle list */
|
||||
for (q = &twlist[hash(p->n, p->r)]; *q; q = &((*q)->cdr)) {
|
||||
if (*q == p) {
|
||||
*q = p->cdr;
|
||||
X(ifree)(p->W);
|
||||
X(ifree)(p);
|
||||
*pp = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
A(0 /* can't happen */ );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void X(twiddle_awake)(enum wakefulness wakefulness, twid **pp,
|
||||
const tw_instr *instr, INT n, INT r, INT m)
|
||||
{
|
||||
switch (wakefulness) {
|
||||
case SLEEPY:
|
||||
twiddle_destroy(pp);
|
||||
break;
|
||||
default:
|
||||
mktwiddle(wakefulness, pp, instr, n, r, m);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user