Updates
This commit is contained in:
12
fftw-3.3.10/reodft/Makefile.am
Normal file
12
fftw-3.3.10/reodft/Makefile.am
Normal file
@@ -0,0 +1,12 @@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
SUBDIRS =
|
||||
|
||||
noinst_LTLIBRARIES = libreodft.la
|
||||
|
||||
# no longer used due to numerical problems
|
||||
EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c
|
||||
|
||||
libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \
|
||||
reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \
|
||||
rodft00e-r2hc-pad.c reodft00e-splitradix.c
|
||||
# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c
|
||||
787
fftw-3.3.10/reodft/Makefile.in
Normal file
787
fftw-3.3.10/reodft/Makefile.in
Normal file
@@ -0,0 +1,787 @@
|
||||
# Makefile.in generated by automake 1.16.3 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
VPATH = @srcdir@
|
||||
am__is_gnu_make = { \
|
||||
if test -z '$(MAKELEVEL)'; then \
|
||||
false; \
|
||||
elif test -n '$(MAKE_HOST)'; then \
|
||||
true; \
|
||||
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
|
||||
true; \
|
||||
else \
|
||||
false; \
|
||||
fi; \
|
||||
}
|
||||
am__make_running_with_option = \
|
||||
case $${target_option-} in \
|
||||
?) ;; \
|
||||
*) echo "am__make_running_with_option: internal error: invalid" \
|
||||
"target option '$${target_option-}' specified" >&2; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
has_opt=no; \
|
||||
sane_makeflags=$$MAKEFLAGS; \
|
||||
if $(am__is_gnu_make); then \
|
||||
sane_makeflags=$$MFLAGS; \
|
||||
else \
|
||||
case $$MAKEFLAGS in \
|
||||
*\\[\ \ ]*) \
|
||||
bs=\\; \
|
||||
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
|
||||
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
|
||||
esac; \
|
||||
fi; \
|
||||
skip_next=no; \
|
||||
strip_trailopt () \
|
||||
{ \
|
||||
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
|
||||
}; \
|
||||
for flg in $$sane_makeflags; do \
|
||||
test $$skip_next = yes && { skip_next=no; continue; }; \
|
||||
case $$flg in \
|
||||
*=*|--*) continue;; \
|
||||
-*I) strip_trailopt 'I'; skip_next=yes;; \
|
||||
-*I?*) strip_trailopt 'I';; \
|
||||
-*O) strip_trailopt 'O'; skip_next=yes;; \
|
||||
-*O?*) strip_trailopt 'O';; \
|
||||
-*l) strip_trailopt 'l'; skip_next=yes;; \
|
||||
-*l?*) strip_trailopt 'l';; \
|
||||
-[dEDm]) skip_next=yes;; \
|
||||
-[JT]) skip_next=yes;; \
|
||||
esac; \
|
||||
case $$flg in \
|
||||
*$$target_option*) has_opt=yes; break;; \
|
||||
esac; \
|
||||
done; \
|
||||
test $$has_opt = yes
|
||||
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
|
||||
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
subdir = reodft
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
|
||||
$(top_srcdir)/m4/acx_pthread.m4 \
|
||||
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
|
||||
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
|
||||
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_version.m4 \
|
||||
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
|
||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
||||
$(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = $(top_builddir)/config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
LTLIBRARIES = $(noinst_LTLIBRARIES)
|
||||
libreodft_la_LIBADD =
|
||||
am_libreodft_la_OBJECTS = conf.lo reodft010e-r2hc.lo \
|
||||
reodft11e-radix2.lo reodft11e-r2hc-odd.lo redft00e-r2hc-pad.lo \
|
||||
rodft00e-r2hc-pad.lo reodft00e-splitradix.lo
|
||||
libreodft_la_OBJECTS = $(am_libreodft_la_OBJECTS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
am__v_lt_0 = --silent
|
||||
am__v_lt_1 =
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
am__v_P_0 = false
|
||||
am__v_P_1 = :
|
||||
AM_V_GEN = $(am__v_GEN_@AM_V@)
|
||||
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
|
||||
am__v_GEN_0 = @echo " GEN " $@;
|
||||
am__v_GEN_1 =
|
||||
AM_V_at = $(am__v_at_@AM_V@)
|
||||
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
|
||||
am__v_at_0 = @
|
||||
am__v_at_1 =
|
||||
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||
am__maybe_remake_depfiles = depfiles
|
||||
am__depfiles_remade = ./$(DEPDIR)/conf.Plo \
|
||||
./$(DEPDIR)/redft00e-r2hc-pad.Plo \
|
||||
./$(DEPDIR)/reodft00e-splitradix.Plo \
|
||||
./$(DEPDIR)/reodft010e-r2hc.Plo \
|
||||
./$(DEPDIR)/reodft11e-r2hc-odd.Plo \
|
||||
./$(DEPDIR)/reodft11e-radix2.Plo \
|
||||
./$(DEPDIR)/rodft00e-r2hc-pad.Plo
|
||||
am__mv = mv -f
|
||||
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
|
||||
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
|
||||
$(AM_CFLAGS) $(CFLAGS)
|
||||
AM_V_CC = $(am__v_CC_@AM_V@)
|
||||
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
|
||||
am__v_CC_0 = @echo " CC " $@;
|
||||
am__v_CC_1 =
|
||||
CCLD = $(CC)
|
||||
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
$(AM_LDFLAGS) $(LDFLAGS) -o $@
|
||||
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
|
||||
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
|
||||
am__v_CCLD_0 = @echo " CCLD " $@;
|
||||
am__v_CCLD_1 =
|
||||
SOURCES = $(libreodft_la_SOURCES)
|
||||
DIST_SOURCES = $(libreodft_la_SOURCES)
|
||||
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
|
||||
ctags-recursive dvi-recursive html-recursive info-recursive \
|
||||
install-data-recursive install-dvi-recursive \
|
||||
install-exec-recursive install-html-recursive \
|
||||
install-info-recursive install-pdf-recursive \
|
||||
install-ps-recursive install-recursive installcheck-recursive \
|
||||
installdirs-recursive pdf-recursive ps-recursive \
|
||||
tags-recursive uninstall-recursive
|
||||
am__can_run_installinfo = \
|
||||
case $$AM_UPDATE_INFO_DIR in \
|
||||
n|no|NO) false;; \
|
||||
*) (install-info --version) >/dev/null 2>&1;; \
|
||||
esac
|
||||
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
|
||||
distclean-recursive maintainer-clean-recursive
|
||||
am__recursive_targets = \
|
||||
$(RECURSIVE_TARGETS) \
|
||||
$(RECURSIVE_CLEAN_TARGETS) \
|
||||
$(am__extra_recursive_targets)
|
||||
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
|
||||
distdir distdir-am
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
|
||||
# Read a list of newline-separated strings from the standard input,
|
||||
# and print each of them once, without duplicates. Input order is
|
||||
# *not* preserved.
|
||||
am__uniquify_input = $(AWK) '\
|
||||
BEGIN { nonempty = 0; } \
|
||||
{ items[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in items) print i; }; } \
|
||||
'
|
||||
# Make sure the list of sources is unique. This is necessary because,
|
||||
# e.g., the same source file might be shared among _SOURCES variables
|
||||
# for different programs/libraries.
|
||||
am__define_uniq_tagged_files = \
|
||||
list='$(am__tagged_files)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | $(am__uniquify_input)`
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
DIST_SUBDIRS = $(SUBDIRS)
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
am__relativize = \
|
||||
dir0=`pwd`; \
|
||||
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
|
||||
sed_rest='s,^[^/]*/*,,'; \
|
||||
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
|
||||
sed_butlast='s,/*[^/]*$$,,'; \
|
||||
while test -n "$$dir1"; do \
|
||||
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
|
||||
if test "$$first" != "."; then \
|
||||
if test "$$first" = ".."; then \
|
||||
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
|
||||
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
|
||||
else \
|
||||
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
|
||||
if test "$$first2" = "$$first"; then \
|
||||
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
|
||||
else \
|
||||
dir2="../$$dir2"; \
|
||||
fi; \
|
||||
dir0="$$dir0"/"$$first"; \
|
||||
fi; \
|
||||
fi; \
|
||||
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
|
||||
done; \
|
||||
reldir="$$dir2"
|
||||
ACLOCAL = @ACLOCAL@
|
||||
ALLOCA = @ALLOCA@
|
||||
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
|
||||
AMTAR = @AMTAR@
|
||||
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
|
||||
AR = @AR@
|
||||
AS = @AS@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AVX2_CFLAGS = @AVX2_CFLAGS@
|
||||
AVX512_CFLAGS = @AVX512_CFLAGS@
|
||||
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
|
||||
AVX_CFLAGS = @AVX_CFLAGS@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CHECK_PL_OPTS = @CHECK_PL_OPTS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
|
||||
C_MPI_FINT = @C_MPI_FINT@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
DLLTOOL = @DLLTOOL@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
F77 = @F77@
|
||||
FFLAGS = @FFLAGS@
|
||||
FGREP = @FGREP@
|
||||
FLIBS = @FLIBS@
|
||||
GREP = @GREP@
|
||||
INDENT = @INDENT@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
KCVI_CFLAGS = @KCVI_CFLAGS@
|
||||
LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBQUADMATH = @LIBQUADMATH@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
LIPO = @LIPO@
|
||||
LN_S = @LN_S@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
|
||||
MAINT = @MAINT@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
MPICC = @MPICC@
|
||||
MPILIBS = @MPILIBS@
|
||||
MPIRUN = @MPIRUN@
|
||||
NEON_CFLAGS = @NEON_CFLAGS@
|
||||
NM = @NM@
|
||||
NMEDIT = @NMEDIT@
|
||||
OBJDUMP = @OBJDUMP@
|
||||
OBJEXT = @OBJEXT@
|
||||
OCAMLBUILD = @OCAMLBUILD@
|
||||
OPENMP_CFLAGS = @OPENMP_CFLAGS@
|
||||
OTOOL = @OTOOL@
|
||||
OTOOL64 = @OTOOL64@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_URL = @PACKAGE_URL@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
POW_LIB = @POW_LIB@
|
||||
PRECISION = @PRECISION@
|
||||
PREC_SUFFIX = @PREC_SUFFIX@
|
||||
PTHREAD_CC = @PTHREAD_CC@
|
||||
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
|
||||
PTHREAD_LIBS = @PTHREAD_LIBS@
|
||||
RANLIB = @RANLIB@
|
||||
SED = @SED@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
|
||||
SHELL = @SHELL@
|
||||
SSE2_CFLAGS = @SSE2_CFLAGS@
|
||||
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
|
||||
STRIP = @STRIP@
|
||||
THREADLIBS = @THREADLIBS@
|
||||
VERSION = @VERSION@
|
||||
VSX_CFLAGS = @VSX_CFLAGS@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_AR = @ac_ct_AR@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_F77 = @ac_ct_F77@
|
||||
acx_pthread_config = @acx_pthread_config@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build = @build@
|
||||
build_alias = @build_alias@
|
||||
build_cpu = @build_cpu@
|
||||
build_os = @build_os@
|
||||
build_vendor = @build_vendor@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host = @host@
|
||||
host_alias = @host_alias@
|
||||
host_cpu = @host_cpu@
|
||||
host_os = @host_os@
|
||||
host_vendor = @host_vendor@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
runstatedir = @runstatedir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
AM_CPPFLAGS = -I $(top_srcdir)
|
||||
SUBDIRS =
|
||||
noinst_LTLIBRARIES = libreodft.la
|
||||
|
||||
# no longer used due to numerical problems
|
||||
EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c
|
||||
libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \
|
||||
reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \
|
||||
rodft00e-r2hc-pad.c reodft00e-splitradix.c
|
||||
|
||||
all: all-recursive
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .lo .o .obj
|
||||
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu reodft/Makefile'; \
|
||||
$(am__cd) $(top_srcdir) && \
|
||||
$(AUTOMAKE) --gnu reodft/Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
|
||||
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(am__aclocal_m4_deps):
|
||||
|
||||
clean-noinstLTLIBRARIES:
|
||||
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
|
||||
@list='$(noinst_LTLIBRARIES)'; \
|
||||
locs=`for p in $$list; do echo $$p; done | \
|
||||
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
|
||||
sort -u`; \
|
||||
test -z "$$locs" || { \
|
||||
echo rm -f $${locs}; \
|
||||
rm -f $${locs}; \
|
||||
}
|
||||
|
||||
libreodft.la: $(libreodft_la_OBJECTS) $(libreodft_la_DEPENDENCIES) $(EXTRA_libreodft_la_DEPENDENCIES)
|
||||
$(AM_V_CCLD)$(LINK) $(libreodft_la_OBJECTS) $(libreodft_la_LIBADD) $(LIBS)
|
||||
|
||||
mostlyclean-compile:
|
||||
-rm -f *.$(OBJEXT)
|
||||
|
||||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/redft00e-r2hc-pad.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft00e-splitradix.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft010e-r2hc.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-r2hc-odd.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-radix2.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rodft00e-r2hc-pad.Plo@am__quote@ # am--include-marker
|
||||
|
||||
$(am__depfiles_remade):
|
||||
@$(MKDIR_P) $(@D)
|
||||
@echo '# dummy' >$@-t && $(am__mv) $@-t $@
|
||||
|
||||
am--depfiles: $(am__depfiles_remade)
|
||||
|
||||
.c.o:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
|
||||
|
||||
.c.obj:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
|
||||
.c.lo:
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
|
||||
|
||||
mostlyclean-libtool:
|
||||
-rm -f *.lo
|
||||
|
||||
clean-libtool:
|
||||
-rm -rf .libs _libs
|
||||
|
||||
# This directory's subdirectories are mostly independent; you can cd
|
||||
# into them and run 'make' without going through this Makefile.
|
||||
# To change the values of 'make' variables: instead of editing Makefiles,
|
||||
# (1) if the variable is set in 'config.status', edit 'config.status'
|
||||
# (which will cause the Makefiles to be regenerated when you run 'make');
|
||||
# (2) otherwise, pass the desired values on the 'make' command line.
|
||||
$(am__recursive_targets):
|
||||
@fail=; \
|
||||
if $(am__make_keepgoing); then \
|
||||
failcom='fail=yes'; \
|
||||
else \
|
||||
failcom='exit 1'; \
|
||||
fi; \
|
||||
dot_seen=no; \
|
||||
target=`echo $@ | sed s/-recursive//`; \
|
||||
case "$@" in \
|
||||
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
|
||||
*) list='$(SUBDIRS)' ;; \
|
||||
esac; \
|
||||
for subdir in $$list; do \
|
||||
echo "Making $$target in $$subdir"; \
|
||||
if test "$$subdir" = "."; then \
|
||||
dot_seen=yes; \
|
||||
local_target="$$target-am"; \
|
||||
else \
|
||||
local_target="$$target"; \
|
||||
fi; \
|
||||
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||
|| eval $$failcom; \
|
||||
done; \
|
||||
if test "$$dot_seen" = "no"; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
|
||||
fi; test -z "$$fail"
|
||||
|
||||
ID: $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); mkid -fID $$unique
|
||||
tags: tags-recursive
|
||||
TAGS: tags
|
||||
|
||||
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
set x; \
|
||||
here=`pwd`; \
|
||||
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
|
||||
include_option=--etags-include; \
|
||||
empty_fix=.; \
|
||||
else \
|
||||
include_option=--include; \
|
||||
empty_fix=; \
|
||||
fi; \
|
||||
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||
if test "$$subdir" = .; then :; else \
|
||||
test ! -f $$subdir/TAGS || \
|
||||
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
|
||||
fi; \
|
||||
done; \
|
||||
$(am__define_uniq_tagged_files); \
|
||||
shift; \
|
||||
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||
test -n "$$unique" || unique=$$empty_fix; \
|
||||
if test $$# -gt 0; then \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
"$$@" $$unique; \
|
||||
else \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
$$unique; \
|
||||
fi; \
|
||||
fi
|
||||
ctags: ctags-recursive
|
||||
|
||||
CTAGS: ctags
|
||||
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
|
||||
$(am__define_uniq_tagged_files); \
|
||||
test -z "$(CTAGS_ARGS)$$unique" \
|
||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||
$$unique
|
||||
|
||||
GTAGS:
|
||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||
&& $(am__cd) $(top_srcdir) \
|
||||
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||
cscopelist: cscopelist-recursive
|
||||
|
||||
cscopelist-am: $(am__tagged_files)
|
||||
list='$(am__tagged_files)'; \
|
||||
case "$(srcdir)" in \
|
||||
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||
esac; \
|
||||
for i in $$list; do \
|
||||
if test -f "$$i"; then \
|
||||
echo "$(subdir)/$$i"; \
|
||||
else \
|
||||
echo "$$sdir/$$i"; \
|
||||
fi; \
|
||||
done >> $(top_builddir)/cscope.files
|
||||
|
||||
distclean-tags:
|
||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||
|
||||
distdir: $(BUILT_SOURCES)
|
||||
$(MAKE) $(AM_MAKEFLAGS) distdir-am
|
||||
|
||||
distdir-am: $(DISTFILES)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d "$(distdir)/$$file"; then \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||
else \
|
||||
test -f "$(distdir)/$$file" \
|
||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
|
||||
if test "$$subdir" = .; then :; else \
|
||||
$(am__make_dryrun) \
|
||||
|| test -d "$(distdir)/$$subdir" \
|
||||
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|
||||
|| exit 1; \
|
||||
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
|
||||
$(am__relativize); \
|
||||
new_distdir=$$reldir; \
|
||||
dir1=$$subdir; dir2="$(top_distdir)"; \
|
||||
$(am__relativize); \
|
||||
new_top_distdir=$$reldir; \
|
||||
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
|
||||
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
|
||||
($(am__cd) $$subdir && \
|
||||
$(MAKE) $(AM_MAKEFLAGS) \
|
||||
top_distdir="$$new_top_distdir" \
|
||||
distdir="$$new_distdir" \
|
||||
am__remove_distdir=: \
|
||||
am__skip_length_check=: \
|
||||
am__skip_mode_fix=: \
|
||||
distdir) \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
check-am: all-am
|
||||
check: check-recursive
|
||||
all-am: Makefile $(LTLIBRARIES)
|
||||
installdirs: installdirs-recursive
|
||||
installdirs-am:
|
||||
install: install-recursive
|
||||
install-exec: install-exec-recursive
|
||||
install-data: install-data-recursive
|
||||
uninstall: uninstall-recursive
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-recursive
|
||||
install-strip:
|
||||
if test -z '$(STRIP)'; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
install; \
|
||||
else \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||
fi
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-recursive
|
||||
|
||||
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
|
||||
mostlyclean-am
|
||||
|
||||
distclean: distclean-recursive
|
||||
-rm -f ./$(DEPDIR)/conf.Plo
|
||||
-rm -f ./$(DEPDIR)/redft00e-r2hc-pad.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft00e-splitradix.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft010e-r2hc.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft11e-r2hc-odd.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft11e-radix2.Plo
|
||||
-rm -f ./$(DEPDIR)/rodft00e-r2hc-pad.Plo
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-compile distclean-generic \
|
||||
distclean-tags
|
||||
|
||||
dvi: dvi-recursive
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-recursive
|
||||
|
||||
html-am:
|
||||
|
||||
info: info-recursive
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am:
|
||||
|
||||
install-dvi: install-dvi-recursive
|
||||
|
||||
install-dvi-am:
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-recursive
|
||||
|
||||
install-html-am:
|
||||
|
||||
install-info: install-info-recursive
|
||||
|
||||
install-info-am:
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-recursive
|
||||
|
||||
install-pdf-am:
|
||||
|
||||
install-ps: install-ps-recursive
|
||||
|
||||
install-ps-am:
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-recursive
|
||||
-rm -f ./$(DEPDIR)/conf.Plo
|
||||
-rm -f ./$(DEPDIR)/redft00e-r2hc-pad.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft00e-splitradix.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft010e-r2hc.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft11e-r2hc-odd.Plo
|
||||
-rm -f ./$(DEPDIR)/reodft11e-radix2.Plo
|
||||
-rm -f ./$(DEPDIR)/rodft00e-r2hc-pad.Plo
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||
|
||||
mostlyclean: mostlyclean-recursive
|
||||
|
||||
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||
mostlyclean-libtool
|
||||
|
||||
pdf: pdf-recursive
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-recursive
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am:
|
||||
|
||||
.MAKE: $(am__recursive_targets) install-am install-strip
|
||||
|
||||
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
|
||||
am--depfiles check check-am clean clean-generic clean-libtool \
|
||||
clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \
|
||||
distclean-compile distclean-generic distclean-libtool \
|
||||
distclean-tags distdir dvi dvi-am html html-am info info-am \
|
||||
install install-am install-data install-data-am install-dvi \
|
||||
install-dvi-am install-exec install-exec-am install-html \
|
||||
install-html-am install-info install-info-am install-man \
|
||||
install-pdf install-pdf-am install-ps install-ps-am \
|
||||
install-strip installcheck installcheck-am installdirs \
|
||||
installdirs-am maintainer-clean maintainer-clean-generic \
|
||||
mostlyclean mostlyclean-compile mostlyclean-generic \
|
||||
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
|
||||
uninstall-am
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
45
fftw-3.3.10/reodft/conf.c
Normal file
45
fftw-3.3.10/reodft/conf.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
static const solvtab s =
|
||||
{
|
||||
#if 0 /* 1 to enable "standard" algorithms with substandard accuracy;
|
||||
you must also add them to Makefile.am to compile these files*/
|
||||
SOLVTAB(X(redft00e_r2hc_register)),
|
||||
SOLVTAB(X(rodft00e_r2hc_register)),
|
||||
SOLVTAB(X(reodft11e_r2hc_register)),
|
||||
#endif
|
||||
SOLVTAB(X(redft00e_r2hc_pad_register)),
|
||||
SOLVTAB(X(rodft00e_r2hc_pad_register)),
|
||||
SOLVTAB(X(reodft00e_splitradix_register)),
|
||||
SOLVTAB(X(reodft010e_r2hc_register)),
|
||||
SOLVTAB(X(reodft11e_radix2_r2hc_register)),
|
||||
SOLVTAB(X(reodft11e_r2hc_odd_register)),
|
||||
|
||||
SOLVTAB_END
|
||||
};
|
||||
|
||||
void X(reodft_conf_standard)(planner *p)
|
||||
{
|
||||
X(solvtab_exec)(s, p);
|
||||
}
|
||||
197
fftw-3.3.10/reodft/redft00e-r2hc-pad.c
Normal file
197
fftw-3.3.10/reodft/redft00e-r2hc-pad.c
Normal file
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do a REDFT00 problem via an R2HC problem, padded symmetrically to
|
||||
twice the size. This is asymptotically a factor of ~2 worse than
|
||||
redft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
|
||||
Recipes), but we abandoned the latter after we discovered that it
|
||||
has intrinsic accuracy problems. */
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld, *cldcpy;
|
||||
INT is;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
} P;
|
||||
|
||||
static void apply(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[0];
|
||||
for (i = 1; i < n; ++i) {
|
||||
R a = I[i * is];
|
||||
buf[i] = a;
|
||||
buf[2*n - i] = a;
|
||||
}
|
||||
buf[i] = I[i * is]; /* i == n, Nyquist */
|
||||
|
||||
/* r2hc transform of size 2*n */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* copy n+1 real numbers (real parts of hc array) from buf to O */
|
||||
{
|
||||
plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
|
||||
cldcpy->apply((plan *) cldcpy, buf, O);
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
X(plan_awake)(ego->cldcpy, wakefulness);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cldcpy);
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(redft00e-r2hc-pad-%D%v%(%p%)%(%p%))",
|
||||
ego->n + 1, ego->vl, ego->cld, ego->cldcpy);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->kind[0] == REDFT00
|
||||
&& p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld = (plan *) 0, *cldcpy;
|
||||
R *buf = (R *) 0;
|
||||
INT n;
|
||||
INT vl, ivs, ovs;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
goto nada;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n - 1;
|
||||
A(n > 0);
|
||||
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
if (!cld)
|
||||
goto nada;
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
|
||||
cldcpy =
|
||||
X(mkplan_d)(plnr,
|
||||
X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
|
||||
X(mktensor_1d)(n+1,1,
|
||||
p->sz->dims[0].os),
|
||||
buf, TAINT(p->O, ovs), R2HC));
|
||||
if (!cldcpy)
|
||||
goto nada;
|
||||
|
||||
X(ifree)(buf);
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, apply);
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->cld = cld;
|
||||
pln->cldcpy = cldcpy;
|
||||
pln->vl = vl;
|
||||
pln->ivs = ivs;
|
||||
pln->ovs = ovs;
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = n + 2*n; /* loads + stores (input -> buf) */
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
|
||||
nada:
|
||||
X(ifree0)(buf);
|
||||
if (cld)
|
||||
X(plan_destroy_internal)(cld);
|
||||
return (plan *)0;
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(redft00e_r2hc_pad_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
215
fftw-3.3.10/reodft/redft00e-r2hc.c
Normal file
215
fftw-3.3.10/reodft/redft00e-r2hc.c
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing.
|
||||
|
||||
This code uses the trick from FFTPACK, also documented in a similar
|
||||
form by Numerical Recipes. Unfortunately, this algorithm seems to
|
||||
have intrinsic numerical problems (similar to those in
|
||||
reodft11e-r2hc.c), possibly due to the fact that it multiplies its
|
||||
input by a cosine, causing a loss of precision near the zero. For
|
||||
transforms of 16k points, it has already lost three or four decimal
|
||||
places of accuracy, which we deem unacceptable.
|
||||
|
||||
So, we have abandoned this algorithm in favor of the one in
|
||||
redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
|
||||
The only other alternative in the literature that does not have
|
||||
similar numerical difficulties seems to be the direct adaptation of
|
||||
the Cooley-Tukey decomposition for symmetric data, but this would
|
||||
require a whole new set of codelets and it's not clear that it's
|
||||
worth it at this point. However, we did implement the latter
|
||||
algorithm for the specific case of odd n (logically adapting the
|
||||
split-radix algorithm); see reodft00e-splitradix.c. */
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
twid *td;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
} P;
|
||||
|
||||
static void apply(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
E csum;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[0] + I[is * n];
|
||||
csum = I[0] - I[is * n];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb;
|
||||
a = I[is * i];
|
||||
b = I[is * (n - i)];
|
||||
csum += W[2*i] * (amb = K(2.0)*(a - b));
|
||||
amb = W[2*i+1] * amb;
|
||||
apb = (a + b);
|
||||
buf[i] = apb - amb;
|
||||
buf[n - i] = apb + amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(2.0) * I[is * i];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* FIXME: use recursive/cascade summation for better stability? */
|
||||
O[0] = buf[0];
|
||||
O[os] = csum;
|
||||
for (i = 1; i + i < n; ++i) {
|
||||
INT k = i + i;
|
||||
O[os * k] = buf[i];
|
||||
O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i];
|
||||
}
|
||||
if (i + i == n) {
|
||||
O[os * n] = buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr redft00e_tw[] = {
|
||||
{ TW_COS, 0, 1 },
|
||||
{ TW_SIN, 0, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
X(twiddle_awake)(wakefulness,
|
||||
&ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(redft00e-r2hc-%D%v%(%p%))", ego->n + 1, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->kind[0] == REDFT00
|
||||
&& p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n - 1;
|
||||
A(n > 0);
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, apply);
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->td = 0;
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
|
||||
ops.add = 2 + (n-1)/2 * 5;
|
||||
ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(redft00e_r2hc_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
42
fftw-3.3.10/reodft/reodft.h
Normal file
42
fftw-3.3.10/reodft/reodft.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __REODFT_H__
|
||||
#define __REODFT_H__
|
||||
|
||||
#include "kernel/ifftw.h"
|
||||
#include "rdft/rdft.h"
|
||||
|
||||
#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
|
||||
|
||||
void X(redft00e_r2hc_register)(planner *p);
|
||||
void X(redft00e_r2hc_pad_register)(planner *p);
|
||||
void X(rodft00e_r2hc_register)(planner *p);
|
||||
void X(rodft00e_r2hc_pad_register)(planner *p);
|
||||
void X(reodft00e_splitradix_register)(planner *p);
|
||||
void X(reodft010e_r2hc_register)(planner *p);
|
||||
void X(reodft11e_r2hc_register)(planner *p);
|
||||
void X(reodft11e_radix2_r2hc_register)(planner *p);
|
||||
void X(reodft11e_r2hc_odd_register)(planner *p);
|
||||
|
||||
/* configurations */
|
||||
void X(reodft_conf_standard)(planner *p);
|
||||
|
||||
#endif /* __REODFT_H__ */
|
||||
354
fftw-3.3.10/reodft/reodft00e-splitradix.c
Normal file
354
fftw-3.3.10/reodft/reodft00e-splitradix.c
Normal file
@@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (c) 2005 Matteo Frigo
|
||||
* Copyright (c) 2005 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do an R{E,O}DFT00 problem (of an odd length n) recursively via an
|
||||
R{E,O}DFT00 problem and an RDFT problem of half the length.
|
||||
|
||||
This works by "logically" expanding the array to a real-even/odd DFT of
|
||||
length 2n-/+2 and then applying the split-radix algorithm.
|
||||
|
||||
In this way, we can avoid having to pad to twice the length
|
||||
(ala redft00-r2hc-pad), saving a factor of ~2 for n=2^m+/-1,
|
||||
but don't incur the accuracy loss that the "ordinary" algorithm
|
||||
sacrifices (ala redft00-r2hc.c).
|
||||
*/
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *clde, *cldo;
|
||||
twid *td;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
} P;
|
||||
|
||||
/* redft00 */
|
||||
static void apply_e(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, j, n = ego->n + 1, n2 = (n-1)/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W - 2;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
/* do size (n-1)/2 r2hc transform of odd-indexed elements
|
||||
with stride 4, "wrapping around" end of array with even
|
||||
boundary conditions */
|
||||
for (j = 0, i = 1; i < n; i += 4)
|
||||
buf[j++] = I[is * i];
|
||||
for (i = 2*n-2-i; i > 0; i -= 4)
|
||||
buf[j++] = I[is * i];
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cldo;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* do size (n+1)/2 redft00 of the even-indexed elements,
|
||||
writing to O: */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->clde;
|
||||
cld->apply((plan *) cld, I, O);
|
||||
}
|
||||
|
||||
/* combine the results with the twiddle factors to get output */
|
||||
{ /* DC element */
|
||||
E b20 = O[0], b0 = K(2.0) * buf[0];
|
||||
O[0] = b20 + b0;
|
||||
O[2*(n2*os)] = b20 - b0;
|
||||
/* O[n2*os] = O[n2*os]; */
|
||||
}
|
||||
for (i = 1; i < n2 - i; ++i) {
|
||||
E ap, am, br, bi, wr, wi, wbr, wbi;
|
||||
br = buf[i];
|
||||
bi = buf[n2 - i];
|
||||
wr = W[2*i];
|
||||
wi = W[2*i+1];
|
||||
#if FFT_SIGN == -1
|
||||
wbr = K(2.0) * (wr*br + wi*bi);
|
||||
wbi = K(2.0) * (wr*bi - wi*br);
|
||||
#else
|
||||
wbr = K(2.0) * (wr*br - wi*bi);
|
||||
wbi = K(2.0) * (wr*bi + wi*br);
|
||||
#endif
|
||||
ap = O[i*os];
|
||||
O[i*os] = ap + wbr;
|
||||
O[(2*n2 - i)*os] = ap - wbr;
|
||||
am = O[(n2 - i)*os];
|
||||
#if FFT_SIGN == -1
|
||||
O[(n2 - i)*os] = am - wbi;
|
||||
O[(n2 + i)*os] = am + wbi;
|
||||
#else
|
||||
O[(n2 - i)*os] = am + wbi;
|
||||
O[(n2 + i)*os] = am - wbi;
|
||||
#endif
|
||||
}
|
||||
if (i == n2 - i) { /* Nyquist element */
|
||||
E ap, wbr;
|
||||
wbr = K(2.0) * (W[2*i] * buf[i]);
|
||||
ap = O[i*os];
|
||||
O[i*os] = ap + wbr;
|
||||
O[(2*n2 - i)*os] = ap - wbr;
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
/* rodft00 */
|
||||
static void apply_o(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, j, n = ego->n - 1, n2 = (n+1)/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W - 2;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
/* do size (n+1)/2 r2hc transform of even-indexed elements
|
||||
with stride 4, "wrapping around" end of array with odd
|
||||
boundary conditions */
|
||||
for (j = 0, i = 0; i < n; i += 4)
|
||||
buf[j++] = I[is * i];
|
||||
for (i = 2*n-i; i > 0; i -= 4)
|
||||
buf[j++] = -I[is * i];
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cldo;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* do size (n-1)/2 rodft00 of the odd-indexed elements,
|
||||
writing to O: */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->clde;
|
||||
if (I == O) {
|
||||
/* can't use I+is and I, subplan would lose in-placeness */
|
||||
cld->apply((plan *) cld, I + is, I + is);
|
||||
/* we could maybe avoid this copy by modifying the
|
||||
twiddle loop, but currently I can't be bothered. */
|
||||
A(is >= os);
|
||||
for (i = 0; i < n2-1; ++i)
|
||||
O[os*i] = I[is*(i+1)];
|
||||
}
|
||||
else
|
||||
cld->apply((plan *) cld, I + is, O);
|
||||
}
|
||||
|
||||
/* combine the results with the twiddle factors to get output */
|
||||
O[(n2-1)*os] = K(2.0) * buf[0];
|
||||
for (i = 1; i < n2 - i; ++i) {
|
||||
E ap, am, br, bi, wr, wi, wbr, wbi;
|
||||
br = buf[i];
|
||||
bi = buf[n2 - i];
|
||||
wr = W[2*i];
|
||||
wi = W[2*i+1];
|
||||
#if FFT_SIGN == -1
|
||||
wbr = K(2.0) * (wr*br + wi*bi);
|
||||
wbi = K(2.0) * (wi*br - wr*bi);
|
||||
#else
|
||||
wbr = K(2.0) * (wr*br - wi*bi);
|
||||
wbi = K(2.0) * (wr*bi + wi*br);
|
||||
#endif
|
||||
ap = O[(i-1)*os];
|
||||
O[(i-1)*os] = wbi + ap;
|
||||
O[(2*n2-1 - i)*os] = wbi - ap;
|
||||
am = O[(n2-1 - i)*os];
|
||||
#if FFT_SIGN == -1
|
||||
O[(n2-1 - i)*os] = wbr + am;
|
||||
O[(n2-1 + i)*os] = wbr - am;
|
||||
#else
|
||||
O[(n2-1 - i)*os] = wbr + am;
|
||||
O[(n2-1 + i)*os] = wbr - am;
|
||||
#endif
|
||||
}
|
||||
if (i == n2 - i) { /* Nyquist element */
|
||||
E ap, wbi;
|
||||
wbi = K(2.0) * (W[2*i+1] * buf[i]);
|
||||
ap = O[(i-1)*os];
|
||||
O[(i-1)*os] = wbi + ap;
|
||||
O[(2*n2-1 - i)*os] = wbi - ap;
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr reodft00e_tw[] = {
|
||||
{ TW_COS, 1, 1 },
|
||||
{ TW_SIN, 1, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->clde, wakefulness);
|
||||
X(plan_awake)(ego->cldo, wakefulness);
|
||||
X(twiddle_awake)(wakefulness, &ego->td, reodft00e_tw,
|
||||
2*ego->n, 1, ego->n/4);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cldo);
|
||||
X(plan_destroy_internal)(ego->clde);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
if (ego->super.apply == apply_e)
|
||||
p->print(p, "(redft00e-splitradix-%D%v%(%p%)%(%p%))",
|
||||
ego->n + 1, ego->vl, ego->clde, ego->cldo);
|
||||
else
|
||||
p->print(p, "(rodft00e-splitradix-%D%v%(%p%)%(%p%))",
|
||||
ego->n - 1, ego->vl, ego->clde, ego->cldo);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& (p->kind[0] == REDFT00 || p->kind[0] == RODFT00)
|
||||
&& p->sz->dims[0].n > 1 /* don't create size-0 sub-plans */
|
||||
&& p->sz->dims[0].n % 2 /* odd: 4 divides "logical" DFT */
|
||||
&& (p->I != p->O || p->vecsz->rnk == 0
|
||||
|| p->vecsz->dims[0].is == p->vecsz->dims[0].os)
|
||||
&& (p->kind[0] != RODFT00 || p->I != p->O ||
|
||||
p->sz->dims[0].is >= p->sz->dims[0].os) /* laziness */
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *clde, *cldo;
|
||||
R *buf;
|
||||
INT n, n0;
|
||||
opcnt ops;
|
||||
int inplace_odd;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1);
|
||||
A(n > 0 && n % 2 == 0);
|
||||
buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS);
|
||||
|
||||
inplace_odd = p->kind[0]==RODFT00 && p->I == p->O;
|
||||
clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
|
||||
X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is,
|
||||
inplace_odd ? p->sz->dims[0].is
|
||||
: p->sz->dims[0].os),
|
||||
X(mktensor_0d)(),
|
||||
TAINT(p->I
|
||||
+ p->sz->dims[0].is * (p->kind[0]==RODFT00),
|
||||
p->vecsz->rnk ? p->vecsz->dims[0].is : 0),
|
||||
TAINT(p->O
|
||||
+ p->sz->dims[0].is * inplace_odd,
|
||||
p->vecsz->rnk ? p->vecsz->dims[0].os : 0),
|
||||
p->kind[0]));
|
||||
if (!clde) {
|
||||
X(ifree)(buf);
|
||||
return (plan *)0;
|
||||
}
|
||||
|
||||
cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
|
||||
X(mktensor_1d)(n/2, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cldo)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o);
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->clde = clde;
|
||||
pln->cldo = cldo;
|
||||
pln->td = 0;
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = n/2;
|
||||
ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) +
|
||||
(n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
|
||||
ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
|
||||
|
||||
/* tweak ops.other so that r2hc-pad is used for small sizes, which
|
||||
seems to be a lot faster on my machine: */
|
||||
ops.other += 256;
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(reodft00e_splitradix_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
410
fftw-3.3.10/reodft/reodft010e-r2hc.c
Normal file
410
fftw-3.3.10/reodft/reodft010e-r2hc.c
Normal file
@@ -0,0 +1,410 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do an R{E,O}DFT{01,10} problem via an R2HC problem, with some
|
||||
pre/post-processing ala FFTPACK. */
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
twid *td;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
rdft_kind kind;
|
||||
} P;
|
||||
|
||||
/* A real-even-01 DFT operates logically on a size-4N array:
|
||||
I 0 -r(I*) -I 0 r(I*),
|
||||
where r denotes reversal and * denotes deletion of the 0th element.
|
||||
To compute the transform of this, we imagine performing a radix-4
|
||||
(real-input) DIF step, which turns the size-4N DFT into 4 size-N
|
||||
(contiguous) DFTs, two of which are zero and two of which are
|
||||
conjugates. The non-redundant size-N DFT has halfcomplex input, so
|
||||
we can do it with a size-N hc2r transform. (In order to share
|
||||
plans with the re10 (inverse) transform, however, we use the DHT
|
||||
trick to re-express the hc2r problem as r2hc. This has little cost
|
||||
since we are already pre- and post-processing the data in {i,n-i}
|
||||
order.) Finally, we have to write out the data in the correct
|
||||
order...the two size-N redundant (conjugate) hc2r DFTs correspond
|
||||
to the even and odd outputs in O (i.e. the usual interleaved output
|
||||
of DIF transforms); since this data has even symmetry, we only
|
||||
write the first half of it.
|
||||
|
||||
The real-even-10 DFT is just the reverse of these steps, i.e. a
|
||||
radix-4 DIT transform. There, however, we just use the r2hc
|
||||
transform naturally without resorting to the DHT trick.
|
||||
|
||||
A real-odd-01 DFT is very similar, except that the input is
|
||||
0 I (rI)* 0 -I -(rI)*. This format, however, can be transformed
|
||||
into precisely the real-even-01 format above by sending I -> rI
|
||||
and shifting the array by N. The former swap is just another
|
||||
transformation on the input during preprocessing; the latter
|
||||
multiplies the even/odd outputs by i/-i, which combines with
|
||||
the factor of -i (to take the imaginary part) to simply flip
|
||||
the sign of the odd outputs. Vice-versa for real-odd-10.
|
||||
|
||||
The FFTPACK source code was very helpful in working this out.
|
||||
(They do unnecessary passes over the array, though.) The same
|
||||
algorithm is also described in:
|
||||
|
||||
John Makhoul, "A fast cosine transform in one and two dimensions,"
|
||||
IEEE Trans. on Acoust. Speech and Sig. Proc., ASSP-28 (1), 27--34 (1980).
|
||||
|
||||
Note that Numerical Recipes suggests a different algorithm that
|
||||
requires more operations and uses trig. functions for both the pre-
|
||||
and post-processing passes.
|
||||
*/
|
||||
|
||||
static void apply_re01(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb, wa, wb;
|
||||
a = I[is * i];
|
||||
b = I[is * (n - i)];
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(2.0) * I[is * i] * W[2*i];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
O[0] = buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b;
|
||||
INT k;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
k = i + i;
|
||||
O[os * (k - 1)] = a - b;
|
||||
O[os * k] = a + b;
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * (n - 1)] = buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
/* ro01 is same as re01, but with i <-> n - 1 - i in the input and
|
||||
the sign of the odd output elements flipped. */
|
||||
static void apply_ro01(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[is * (n - 1)];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb, wa, wb;
|
||||
a = I[is * (n - 1 - i)];
|
||||
b = I[is * (i - 1)];
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i+1];
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(2.0) * I[is * (i - 1)] * W[2*i];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
O[0] = buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b;
|
||||
INT k;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
k = i + i;
|
||||
O[os * (k - 1)] = b - a;
|
||||
O[os * k] = a + b;
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * (n - 1)] = -buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void apply_re10(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E u, v;
|
||||
INT k = i + i;
|
||||
u = I[is * (k - 1)];
|
||||
v = I[is * k];
|
||||
buf[n - i] = u;
|
||||
buf[i] = v;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = I[is * (n - 1)];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
O[0] = K(2.0) * buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, wa, wb;
|
||||
a = K(2.0) * buf[i];
|
||||
b = K(2.0) * buf[n - i];
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
O[os * i] = wa * a + wb * b;
|
||||
O[os * (n - i)] = wb * a - wa * b;
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * i] = K(2.0) * buf[i] * W[2*i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
/* ro10 is same as re10, but with i <-> n - 1 - i in the output and
|
||||
the sign of the odd input elements flipped. */
|
||||
static void apply_ro10(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = I[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E u, v;
|
||||
INT k = i + i;
|
||||
u = -I[is * (k - 1)];
|
||||
v = I[is * k];
|
||||
buf[n - i] = u;
|
||||
buf[i] = v;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = -I[is * (n - 1)];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
O[os * (n - 1)] = K(2.0) * buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, wa, wb;
|
||||
a = K(2.0) * buf[i];
|
||||
b = K(2.0) * buf[n - i];
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
O[os * (n - 1 - i)] = wa * a + wb * b;
|
||||
O[os * (i - 1)] = wb * a - wa * b;
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * (i - 1)] = K(2.0) * buf[i] * W[2*i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr reodft010e_tw[] = {
|
||||
{ TW_COS, 0, 1 },
|
||||
{ TW_SIN, 0, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
|
||||
X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw,
|
||||
4*ego->n, 1, ego->n/2+1);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(%se-r2hc-%D%v%(%p%))",
|
||||
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& (p->kind[0] == REDFT01 || p->kind[0] == REDFT10
|
||||
|| p->kind[0] == RODFT01 || p->kind[0] == RODFT10)
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n;
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
switch (p->kind[0]) {
|
||||
case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break;
|
||||
case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break;
|
||||
case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break;
|
||||
case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break;
|
||||
default: A(0); return (plan*)0;
|
||||
}
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->td = 0;
|
||||
pln->kind = p->kind[0];
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5;
|
||||
if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) {
|
||||
ops.add = (n-1)/2 * 6;
|
||||
ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2;
|
||||
}
|
||||
else { /* 10 transforms */
|
||||
ops.add = (n-1)/2 * 2;
|
||||
ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2;
|
||||
}
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(reodft010e_r2hc_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
300
fftw-3.3.10/reodft/reodft11e-r2hc-odd.c
Normal file
300
fftw-3.3.10/reodft/reodft11e-r2hc-odd.c
Normal file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do an R{E,O}DFT11 problem via an R2HC problem of the same *odd* size,
|
||||
with some permutations and post-processing, as described in:
|
||||
|
||||
S. C. Chan and K. L. Ho, "Fast algorithms for computing the
|
||||
discrete cosine transform," IEEE Trans. Circuits Systems II:
|
||||
Analog & Digital Sig. Proc. 39 (3), 185--190 (1992).
|
||||
|
||||
(For even sizes, see reodft11e-radix2.c.)
|
||||
|
||||
This algorithm is related to the 8 x n prime-factor-algorithm (PFA)
|
||||
decomposition of the size 8n "logical" DFT corresponding to the
|
||||
R{EO}DFT11.
|
||||
|
||||
Aside from very confusing notation (several symbols are redefined
|
||||
from one line to the next), be aware that this paper has some
|
||||
errors. In particular, the signs are wrong in Eqs. (34-35). Also,
|
||||
Eqs. (36-37) should be simply C(k) = C(2k + 1 mod N), and similarly
|
||||
for S (or, equivalently, the second cases should have 2*N - 2*k - 1
|
||||
instead of N - k - 1). Note also that in their definition of the
|
||||
DFT, similarly to FFTW's, the exponent's sign is -1, but they
|
||||
forgot to correspondingly multiply S (the sine terms) by -1.
|
||||
*/
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
rdft_kind kind;
|
||||
} P;
|
||||
|
||||
static DK(SQRT2, +1.4142135623730950488016887242096980785696718753769);
|
||||
|
||||
#define SGN_SET(x, i) ((i) % 2 ? -(x) : (x))
|
||||
|
||||
static void apply_re11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n, n2 = n/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
{
|
||||
INT m;
|
||||
for (i = 0, m = n2; m < n; ++i, m += 4)
|
||||
buf[i] = I[is * m];
|
||||
for (; m < 2 * n; ++i, m += 4)
|
||||
buf[i] = -I[is * (2*n - m - 1)];
|
||||
for (; m < 3 * n; ++i, m += 4)
|
||||
buf[i] = -I[is * (m - 2*n)];
|
||||
for (; m < 4 * n; ++i, m += 4)
|
||||
buf[i] = I[is * (4*n - m - 1)];
|
||||
m -= 4 * n;
|
||||
for (; i < n; ++i, m += 4)
|
||||
buf[i] = I[is * m];
|
||||
}
|
||||
|
||||
{ /* child plan: R2HC of size n */
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
|
||||
for (i = 0; i + i + 1 < n2; ++i) {
|
||||
INT k = i + i + 1;
|
||||
E c1, s1;
|
||||
E c2, s2;
|
||||
c1 = buf[k];
|
||||
c2 = buf[k + 1];
|
||||
s2 = buf[n - (k + 1)];
|
||||
s1 = buf[n - k];
|
||||
|
||||
O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2) +
|
||||
SGN_SET(s1, i/2));
|
||||
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2) -
|
||||
SGN_SET(s1, (n-(i+1))/2));
|
||||
|
||||
O[os * (n2 - (i+1))] = SQRT2 * (SGN_SET(c2, (n2-i)/2) -
|
||||
SGN_SET(s2, (n2-(i+1))/2));
|
||||
O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2) +
|
||||
SGN_SET(s2, (n2+(i+1))/2));
|
||||
}
|
||||
if (i + i + 1 == n2) {
|
||||
E c, s;
|
||||
c = buf[n2];
|
||||
s = buf[n - n2];
|
||||
O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2) +
|
||||
SGN_SET(s, i/2));
|
||||
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2) +
|
||||
SGN_SET(s, (i+1)/2));
|
||||
}
|
||||
O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2);
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
/* like for rodft01, rodft11 is obtained from redft11 by
|
||||
reversing the input and flipping the sign of every other output. */
|
||||
static void apply_ro11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n, n2 = n/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
{
|
||||
INT m;
|
||||
for (i = 0, m = n2; m < n; ++i, m += 4)
|
||||
buf[i] = I[is * (n - 1 - m)];
|
||||
for (; m < 2 * n; ++i, m += 4)
|
||||
buf[i] = -I[is * (m - n)];
|
||||
for (; m < 3 * n; ++i, m += 4)
|
||||
buf[i] = -I[is * (3*n - 1 - m)];
|
||||
for (; m < 4 * n; ++i, m += 4)
|
||||
buf[i] = I[is * (m - 3*n)];
|
||||
m -= 4 * n;
|
||||
for (; i < n; ++i, m += 4)
|
||||
buf[i] = I[is * (n - 1 - m)];
|
||||
}
|
||||
|
||||
{ /* child plan: R2HC of size n */
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
|
||||
for (i = 0; i + i + 1 < n2; ++i) {
|
||||
INT k = i + i + 1;
|
||||
INT j;
|
||||
E c1, s1;
|
||||
E c2, s2;
|
||||
c1 = buf[k];
|
||||
c2 = buf[k + 1];
|
||||
s2 = buf[n - (k + 1)];
|
||||
s1 = buf[n - k];
|
||||
|
||||
O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2 + i) +
|
||||
SGN_SET(s1, i/2 + i));
|
||||
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2 + i) -
|
||||
SGN_SET(s1, (n-(i+1))/2 + i));
|
||||
|
||||
j = n2 - (i+1);
|
||||
O[os * j] = SQRT2 * (SGN_SET(c2, (n2-i)/2 + j) -
|
||||
SGN_SET(s2, (n2-(i+1))/2 + j));
|
||||
O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2 + j) +
|
||||
SGN_SET(s2, (n2+(i+1))/2 + j));
|
||||
}
|
||||
if (i + i + 1 == n2) {
|
||||
E c, s;
|
||||
c = buf[n2];
|
||||
s = buf[n - n2];
|
||||
O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2 + i) +
|
||||
SGN_SET(s, i/2 + i));
|
||||
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2 + i) +
|
||||
SGN_SET(s, (i+1)/2 + i));
|
||||
}
|
||||
O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2 + n2);
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(%se-r2hc-odd-%D%v%(%p%))",
|
||||
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->sz->dims[0].n % 2 == 1
|
||||
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n;
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->kind = p->kind[0];
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.add = n - 1;
|
||||
ops.mul = n;
|
||||
ops.other = 4*n;
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(reodft11e_r2hc_odd_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
294
fftw-3.3.10/reodft/reodft11e-r2hc.c
Normal file
294
fftw-3.3.10/reodft/reodft11e-r2hc.c
Normal file
@@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do an R{E,O}DFT11 problem via an R2HC problem, with some
|
||||
pre/post-processing ala FFTPACK. Use a trick from:
|
||||
|
||||
S. C. Chan and K. L. Ho, "Direct methods for computing discrete
|
||||
sinusoidal transforms," IEE Proceedings F 137 (6), 433--442 (1990).
|
||||
|
||||
to re-express as an REDFT01 (DCT-III) problem.
|
||||
|
||||
NOTE: We no longer use this algorithm, because it turns out to suffer
|
||||
a catastrophic loss of accuracy for certain inputs, apparently because
|
||||
its post-processing multiplies the output by a cosine. Near the zero
|
||||
of the cosine, the REDFT01 must produce a near-singular output.
|
||||
*/
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
twid *td, *td2;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
rdft_kind kind;
|
||||
} P;
|
||||
|
||||
static void apply_re11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W;
|
||||
R *buf;
|
||||
E cur;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
/* I wish that this didn't require an extra pass. */
|
||||
/* FIXME: use recursive/cascade summation for better stability? */
|
||||
buf[n - 1] = cur = K(2.0) * I[is * (n - 1)];
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
E curnew;
|
||||
buf[(i - 1)] = curnew = K(2.0) * I[is * (i - 1)] - cur;
|
||||
cur = curnew;
|
||||
}
|
||||
|
||||
W = ego->td->W;
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb, wa, wb;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(2.0) * buf[i] * W[2*i];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
W = ego->td2->W;
|
||||
O[0] = W[0] * buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b;
|
||||
INT k;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
k = i + i;
|
||||
O[os * (k - 1)] = W[k - 1] * (a - b);
|
||||
O[os * k] = W[k] * (a + b);
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * (n - 1)] = W[n - 1] * buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
/* like for rodft01, rodft11 is obtained from redft11 by
|
||||
reversing the input and flipping the sign of every other output. */
|
||||
static void apply_ro11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W;
|
||||
R *buf;
|
||||
E cur;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
/* I wish that this didn't require an extra pass. */
|
||||
/* FIXME: use recursive/cascade summation for better stability? */
|
||||
buf[n - 1] = cur = K(2.0) * I[0];
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
E curnew;
|
||||
buf[(i - 1)] = curnew = K(2.0) * I[is * (n - i)] - cur;
|
||||
cur = curnew;
|
||||
}
|
||||
|
||||
W = ego->td->W;
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb, wa, wb;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(2.0) * buf[i] * W[2*i];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
W = ego->td2->W;
|
||||
O[0] = W[0] * buf[0];
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b;
|
||||
INT k;
|
||||
a = buf[i];
|
||||
b = buf[n - i];
|
||||
k = i + i;
|
||||
O[os * (k - 1)] = W[k - 1] * (b - a);
|
||||
O[os * k] = W[k] * (a + b);
|
||||
}
|
||||
if (i == n - i) {
|
||||
O[os * (n - 1)] = -W[n - 1] * buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr reodft010e_tw[] = {
|
||||
{ TW_COS, 0, 1 },
|
||||
{ TW_SIN, 0, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
static const tw_instr reodft11e_tw[] = {
|
||||
{ TW_COS, 1, 1 },
|
||||
{ TW_NEXT, 2, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
|
||||
X(twiddle_awake)(wakefulness,
|
||||
&ego->td, reodft010e_tw, 4*ego->n, 1, ego->n/2+1);
|
||||
X(twiddle_awake)(wakefulness,
|
||||
&ego->td2, reodft11e_tw, 8*ego->n, 1, ego->n * 2);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(%se-r2hc-%D%v%(%p%))",
|
||||
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n;
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->td = pln->td2 = 0;
|
||||
pln->kind = p->kind[0];
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = 5 + (n-1) * 2 + (n-1)/2 * 12 + (1 - n % 2) * 6;
|
||||
ops.add = (n - 1) * 1 + (n-1)/2 * 6;
|
||||
ops.mul = 2 + (n-1) * 1 + (n-1)/2 * 6 + (1 - n % 2) * 3;
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(reodft11e_r2hc_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
513
fftw-3.3.10/reodft/reodft11e-radix2.c
Normal file
513
fftw-3.3.10/reodft/reodft11e-radix2.c
Normal file
@@ -0,0 +1,513 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do an R{E,O}DFT11 problem of *even* size by a pair of R2HC problems
|
||||
of half the size, plus some pre/post-processing. Use a trick from:
|
||||
|
||||
Zhongde Wang, "On computing the discrete Fourier and cosine transforms,"
|
||||
IEEE Trans. Acoust. Speech Sig. Proc. ASSP-33 (4), 1341--1344 (1985).
|
||||
|
||||
to re-express as a pair of half-size REDFT01 (DCT-III) problems. Our
|
||||
implementation looks quite a bit different from the algorithm described
|
||||
in the paper because we combined the paper's pre/post-processing with
|
||||
the pre/post-processing used to turn REDFT01 into R2HC. (Also, the
|
||||
paper uses a DCT/DST pair, but we turn the DST into a DCT via the
|
||||
usual reordering/sign-flip trick. We additionally combined a couple
|
||||
of the matrices/transformations of the paper into a single pass.)
|
||||
|
||||
NOTE: We originally used a simpler method by S. C. Chan and K. L. Ho
|
||||
that turned out to have numerical problems; see reodft11e-r2hc.c.
|
||||
|
||||
(For odd sizes, see reodft11e-r2hc-odd.c.)
|
||||
*/
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
twid *td, *td2;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
rdft_kind kind;
|
||||
} P;
|
||||
|
||||
static void apply_re11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n, n2 = n/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *W2;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = K(2.0) * I[0];
|
||||
buf[n2] = K(2.0) * I[is * (n - 1)];
|
||||
for (i = 1; i + i < n2; ++i) {
|
||||
INT k = i + i;
|
||||
E a, b, a2, b2;
|
||||
{
|
||||
E u, v;
|
||||
u = I[is * (k - 1)];
|
||||
v = I[is * k];
|
||||
a = u + v;
|
||||
b2 = u - v;
|
||||
}
|
||||
{
|
||||
E u, v;
|
||||
u = I[is * (n - k - 1)];
|
||||
v = I[is * (n - k)];
|
||||
b = u + v;
|
||||
a2 = u - v;
|
||||
}
|
||||
{
|
||||
E wa, wb;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
{
|
||||
E apb, amb;
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n2 - i] = wa * apb - wb * amb;
|
||||
}
|
||||
{
|
||||
E apb, amb;
|
||||
apb = a2 + b2;
|
||||
amb = a2 - b2;
|
||||
buf[n2 + i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i + i == n2) {
|
||||
E u, v;
|
||||
u = I[is * (n2 - 1)];
|
||||
v = I[is * n2];
|
||||
buf[i] = (u + v) * (W[2*i] * K(2.0));
|
||||
buf[n - i] = (u - v) * (W[2*i] * K(2.0));
|
||||
}
|
||||
|
||||
|
||||
/* child plan: two r2hc's of size n/2 */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
W2 = ego->td2->W;
|
||||
{ /* i == 0 case */
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = buf[0];
|
||||
b = buf[n2];
|
||||
O[0] = wa * a + wb * b;
|
||||
O[os * (n - 1)] = wb * a - wa * b;
|
||||
}
|
||||
W2 += 2;
|
||||
for (i = 1; i + i < n2; ++i, W2 += 2) {
|
||||
INT k;
|
||||
E u, v, u2, v2;
|
||||
u = buf[i];
|
||||
v = buf[n2 - i];
|
||||
u2 = buf[n2 + i];
|
||||
v2 = buf[n - i];
|
||||
k = (i + i) - 1;
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = u - v;
|
||||
b = v2 - u2;
|
||||
O[os * k] = wa * a + wb * b;
|
||||
O[os * (n - 1 - k)] = wb * a - wa * b;
|
||||
}
|
||||
++k;
|
||||
W2 += 2;
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = u + v;
|
||||
b = u2 + v2;
|
||||
O[os * k] = wa * a + wb * b;
|
||||
O[os * (n - 1 - k)] = wb * a - wa * b;
|
||||
}
|
||||
}
|
||||
if (i + i == n2) {
|
||||
INT k = (i + i) - 1;
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = buf[i];
|
||||
b = buf[n2 + i];
|
||||
O[os * k] = wa * a - wb * b;
|
||||
O[os * (n - 1 - k)] = wb * a + wa * b;
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
/* This version of apply_re11 uses REDFT01 child plans, more similar
|
||||
to the original paper by Z. Wang. We keep it around for reference
|
||||
(it is simpler) and because it may become more efficient if we
|
||||
ever implement REDFT01 codelets. */
|
||||
|
||||
static void apply_re11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = K(2.0) * I[0];
|
||||
buf[n/2] = K(2.0) * I[is * (n - 1)];
|
||||
for (i = 1; i + i < n; ++i) {
|
||||
INT k = i + i;
|
||||
E a, b;
|
||||
a = I[is * (k - 1)];
|
||||
b = I[is * k];
|
||||
buf[i] = a + b;
|
||||
buf[n - i] = a - b;
|
||||
}
|
||||
|
||||
/* child plan: two redft01's (DCT-III) */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
W = ego->td2->W;
|
||||
for (i = 0; i + 1 < n/2; ++i, W += 2) {
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W[0]; /* cos */
|
||||
wb = W[1]; /* sin */
|
||||
a = buf[i];
|
||||
b = buf[n/2 + i];
|
||||
O[os * i] = wa * a + wb * b;
|
||||
O[os * (n - 1 - i)] = wb * a - wa * b;
|
||||
}
|
||||
++i;
|
||||
W += 2;
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W[0]; /* cos */
|
||||
wb = W[1]; /* sin */
|
||||
a = buf[i];
|
||||
b = buf[n/2 + i];
|
||||
O[os * i] = wa * a - wb * b;
|
||||
O[os * (n - 1 - i)] = wb * a + wa * b;
|
||||
}
|
||||
}
|
||||
if (i < n/2) {
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W[0]; /* cos */
|
||||
wb = W[1]; /* sin */
|
||||
a = buf[i];
|
||||
b = buf[n/2 + i];
|
||||
O[os * i] = wa * a + wb * b;
|
||||
O[os * (n - 1 - i)] = wb * a - wa * b;
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
#endif /* 0 */
|
||||
|
||||
/* like for rodft01, rodft11 is obtained from redft11 by
|
||||
reversing the input and flipping the sign of every other output. */
|
||||
static void apply_ro11(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n, n2 = n/2;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *W2;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = K(2.0) * I[is * (n - 1)];
|
||||
buf[n2] = K(2.0) * I[0];
|
||||
for (i = 1; i + i < n2; ++i) {
|
||||
INT k = i + i;
|
||||
E a, b, a2, b2;
|
||||
{
|
||||
E u, v;
|
||||
u = I[is * (n - k)];
|
||||
v = I[is * (n - 1 - k)];
|
||||
a = u + v;
|
||||
b2 = u - v;
|
||||
}
|
||||
{
|
||||
E u, v;
|
||||
u = I[is * (k)];
|
||||
v = I[is * (k - 1)];
|
||||
b = u + v;
|
||||
a2 = u - v;
|
||||
}
|
||||
{
|
||||
E wa, wb;
|
||||
wa = W[2*i];
|
||||
wb = W[2*i + 1];
|
||||
{
|
||||
E apb, amb;
|
||||
apb = a + b;
|
||||
amb = a - b;
|
||||
buf[i] = wa * amb + wb * apb;
|
||||
buf[n2 - i] = wa * apb - wb * amb;
|
||||
}
|
||||
{
|
||||
E apb, amb;
|
||||
apb = a2 + b2;
|
||||
amb = a2 - b2;
|
||||
buf[n2 + i] = wa * amb + wb * apb;
|
||||
buf[n - i] = wa * apb - wb * amb;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i + i == n2) {
|
||||
E u, v;
|
||||
u = I[is * n2];
|
||||
v = I[is * (n2 - 1)];
|
||||
buf[i] = (u + v) * (W[2*i] * K(2.0));
|
||||
buf[n - i] = (u - v) * (W[2*i] * K(2.0));
|
||||
}
|
||||
|
||||
|
||||
/* child plan: two r2hc's of size n/2 */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
W2 = ego->td2->W;
|
||||
{ /* i == 0 case */
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = buf[0];
|
||||
b = buf[n2];
|
||||
O[0] = wa * a + wb * b;
|
||||
O[os * (n - 1)] = wa * b - wb * a;
|
||||
}
|
||||
W2 += 2;
|
||||
for (i = 1; i + i < n2; ++i, W2 += 2) {
|
||||
INT k;
|
||||
E u, v, u2, v2;
|
||||
u = buf[i];
|
||||
v = buf[n2 - i];
|
||||
u2 = buf[n2 + i];
|
||||
v2 = buf[n - i];
|
||||
k = (i + i) - 1;
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = v - u;
|
||||
b = u2 - v2;
|
||||
O[os * k] = wa * a + wb * b;
|
||||
O[os * (n - 1 - k)] = wa * b - wb * a;
|
||||
}
|
||||
++k;
|
||||
W2 += 2;
|
||||
{
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = u + v;
|
||||
b = u2 + v2;
|
||||
O[os * k] = wa * a + wb * b;
|
||||
O[os * (n - 1 - k)] = wa * b - wb * a;
|
||||
}
|
||||
}
|
||||
if (i + i == n2) {
|
||||
INT k = (i + i) - 1;
|
||||
E wa, wb;
|
||||
E a, b;
|
||||
wa = W2[0]; /* cos */
|
||||
wb = W2[1]; /* sin */
|
||||
a = buf[i];
|
||||
b = buf[n2 + i];
|
||||
O[os * k] = wb * b - wa * a;
|
||||
O[os * (n - 1 - k)] = wa * b + wb * a;
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr reodft010e_tw[] = {
|
||||
{ TW_COS, 0, 1 },
|
||||
{ TW_SIN, 0, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
static const tw_instr reodft11e_tw[] = {
|
||||
{ TW_COS, 1, 1 },
|
||||
{ TW_SIN, 1, 1 },
|
||||
{ TW_NEXT, 2, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
|
||||
X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw,
|
||||
2*ego->n, 1, ego->n/4+1);
|
||||
X(twiddle_awake)(wakefulness, &ego->td2, reodft11e_tw,
|
||||
8*ego->n, 1, ego->n);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(%se-radix2-r2hc-%D%v%(%p%))",
|
||||
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->sz->dims[0].n % 2 == 0
|
||||
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n;
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n/2, 1, 1),
|
||||
X(mktensor_1d)(2, n/2, n/2),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->td = pln->td2 = 0;
|
||||
pln->kind = p->kind[0];
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.add = 2 + (n/2 - 1)/2 * 20;
|
||||
ops.mul = 6 + (n/2 - 1)/2 * 16;
|
||||
ops.other = 4*n + 2 + (n/2 - 1)/2 * 6;
|
||||
if ((n/2) % 2 == 0) {
|
||||
ops.add += 4;
|
||||
ops.mul += 8;
|
||||
ops.other += 4;
|
||||
}
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(reodft11e_radix2_r2hc_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
195
fftw-3.3.10/reodft/rodft00e-r2hc-pad.c
Normal file
195
fftw-3.3.10/reodft/rodft00e-r2hc-pad.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do a RODFT00 problem via an R2HC problem, padded antisymmetrically to
|
||||
twice the size. This is asymptotically a factor of ~2 worse than
|
||||
rodft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
|
||||
Recipes), but we abandoned the latter after we discovered that it
|
||||
has intrinsic accuracy problems. */
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld, *cldcpy;
|
||||
INT is;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
} P;
|
||||
|
||||
static void apply(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = K(0.0);
|
||||
for (i = 1; i < n; ++i) {
|
||||
R a = I[(i-1) * is];
|
||||
buf[i] = -a;
|
||||
buf[2*n - i] = a;
|
||||
}
|
||||
buf[i] = K(0.0); /* i == n, Nyquist */
|
||||
|
||||
/* r2hc transform of size 2*n */
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* copy n-1 real numbers (imag. parts of hc array) from buf to O */
|
||||
{
|
||||
plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
|
||||
cldcpy->apply((plan *) cldcpy, buf+2*n-1, O);
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
X(plan_awake)(ego->cldcpy, wakefulness);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cldcpy);
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(rodft00e-r2hc-pad-%D%v%(%p%)%(%p%))",
|
||||
ego->n - 1, ego->vl, ego->cld, ego->cldcpy);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->kind[0] == RODFT00
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld = (plan *) 0, *cldcpy;
|
||||
R *buf = (R *) 0;
|
||||
INT n;
|
||||
INT vl, ivs, ovs;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
goto nada;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n + 1;
|
||||
A(n > 0);
|
||||
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
if (!cld)
|
||||
goto nada;
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
|
||||
cldcpy =
|
||||
X(mkplan_d)(plnr,
|
||||
X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
|
||||
X(mktensor_1d)(n-1,-1,
|
||||
p->sz->dims[0].os),
|
||||
buf+2*n-1,TAINT(p->O, ovs), R2HC));
|
||||
if (!cldcpy)
|
||||
goto nada;
|
||||
|
||||
X(ifree)(buf);
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, apply);
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->cld = cld;
|
||||
pln->cldcpy = cldcpy;
|
||||
pln->vl = vl;
|
||||
pln->ivs = ivs;
|
||||
pln->ovs = ovs;
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = n-1 + 2*n; /* loads + stores (input -> buf) */
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
|
||||
nada:
|
||||
X(ifree0)(buf);
|
||||
if (cld)
|
||||
X(plan_destroy_internal)(cld);
|
||||
return (plan *)0;
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(rodft00e_r2hc_pad_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
211
fftw-3.3.10/reodft/rodft00e-r2hc.c
Normal file
211
fftw-3.3.10/reodft/rodft00e-r2hc.c
Normal file
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* Do a RODFT00 problem via an R2HC problem, with some pre/post-processing.
|
||||
|
||||
This code uses the trick from FFTPACK, also documented in a similar
|
||||
form by Numerical Recipes. Unfortunately, this algorithm seems to
|
||||
have intrinsic numerical problems (similar to those in
|
||||
reodft11e-r2hc.c), possibly due to the fact that it multiplies its
|
||||
input by a sine, causing a loss of precision near the zero. For
|
||||
transforms of 16k points, it has already lost three or four decimal
|
||||
places of accuracy, which we deem unacceptable.
|
||||
|
||||
So, we have abandoned this algorithm in favor of the one in
|
||||
rodft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
|
||||
The only other alternative in the literature that does not have
|
||||
similar numerical difficulties seems to be the direct adaptation of
|
||||
the Cooley-Tukey decomposition for antisymmetric data, but this
|
||||
would require a whole new set of codelets and it's not clear that
|
||||
it's worth it at this point. However, we did implement the latter
|
||||
algorithm for the specific case of odd n (logically adapting the
|
||||
split-radix algorithm); see reodft00e-splitradix.c. */
|
||||
|
||||
#include "reodft/reodft.h"
|
||||
|
||||
typedef struct {
|
||||
solver super;
|
||||
} S;
|
||||
|
||||
typedef struct {
|
||||
plan_rdft super;
|
||||
plan *cld;
|
||||
twid *td;
|
||||
INT is, os;
|
||||
INT n;
|
||||
INT vl;
|
||||
INT ivs, ovs;
|
||||
} P;
|
||||
|
||||
static void apply(const plan *ego_, R *I, R *O)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
INT is = ego->is, os = ego->os;
|
||||
INT i, n = ego->n;
|
||||
INT iv, vl = ego->vl;
|
||||
INT ivs = ego->ivs, ovs = ego->ovs;
|
||||
R *W = ego->td->W;
|
||||
R *buf;
|
||||
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
|
||||
buf[0] = 0;
|
||||
for (i = 1; i < n - i; ++i) {
|
||||
E a, b, apb, amb;
|
||||
a = I[is * (i - 1)];
|
||||
b = I[is * ((n - i) - 1)];
|
||||
apb = K(2.0) * W[i] * (a + b);
|
||||
amb = (a - b);
|
||||
buf[i] = apb + amb;
|
||||
buf[n - i] = apb - amb;
|
||||
}
|
||||
if (i == n - i) {
|
||||
buf[i] = K(4.0) * I[is * (i - 1)];
|
||||
}
|
||||
|
||||
{
|
||||
plan_rdft *cld = (plan_rdft *) ego->cld;
|
||||
cld->apply((plan *) cld, buf, buf);
|
||||
}
|
||||
|
||||
/* FIXME: use recursive/cascade summation for better stability? */
|
||||
O[0] = buf[0] * 0.5;
|
||||
for (i = 1; i + i < n - 1; ++i) {
|
||||
INT k = i + i;
|
||||
O[os * (k - 1)] = -buf[n - i];
|
||||
O[os * k] = O[os * (k - 2)] + buf[i];
|
||||
}
|
||||
if (i + i == n - 1) {
|
||||
O[os * (n - 2)] = -buf[n - i];
|
||||
}
|
||||
}
|
||||
|
||||
X(ifree)(buf);
|
||||
}
|
||||
|
||||
static void awake(plan *ego_, enum wakefulness wakefulness)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
static const tw_instr rodft00e_tw[] = {
|
||||
{ TW_SIN, 0, 1 },
|
||||
{ TW_NEXT, 1, 0 }
|
||||
};
|
||||
|
||||
X(plan_awake)(ego->cld, wakefulness);
|
||||
|
||||
X(twiddle_awake)(wakefulness,
|
||||
&ego->td, rodft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
|
||||
}
|
||||
|
||||
static void destroy(plan *ego_)
|
||||
{
|
||||
P *ego = (P *) ego_;
|
||||
X(plan_destroy_internal)(ego->cld);
|
||||
}
|
||||
|
||||
static void print(const plan *ego_, printer *p)
|
||||
{
|
||||
const P *ego = (const P *) ego_;
|
||||
p->print(p, "(rodft00e-r2hc-%D%v%(%p%))", ego->n - 1, ego->vl, ego->cld);
|
||||
}
|
||||
|
||||
static int applicable0(const solver *ego_, const problem *p_)
|
||||
{
|
||||
const problem_rdft *p = (const problem_rdft *) p_;
|
||||
UNUSED(ego_);
|
||||
|
||||
return (1
|
||||
&& p->sz->rnk == 1
|
||||
&& p->vecsz->rnk <= 1
|
||||
&& p->kind[0] == RODFT00
|
||||
);
|
||||
}
|
||||
|
||||
static int applicable(const solver *ego, const problem *p, const planner *plnr)
|
||||
{
|
||||
return (!NO_SLOWP(plnr) && applicable0(ego, p));
|
||||
}
|
||||
|
||||
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
|
||||
{
|
||||
P *pln;
|
||||
const problem_rdft *p;
|
||||
plan *cld;
|
||||
R *buf;
|
||||
INT n;
|
||||
opcnt ops;
|
||||
|
||||
static const plan_adt padt = {
|
||||
X(rdft_solve), awake, print, destroy
|
||||
};
|
||||
|
||||
if (!applicable(ego_, p_, plnr))
|
||||
return (plan *)0;
|
||||
|
||||
p = (const problem_rdft *) p_;
|
||||
|
||||
n = p->sz->dims[0].n + 1;
|
||||
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
|
||||
|
||||
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
|
||||
X(mktensor_0d)(),
|
||||
buf, buf, R2HC));
|
||||
X(ifree)(buf);
|
||||
if (!cld)
|
||||
return (plan *)0;
|
||||
|
||||
pln = MKPLAN_RDFT(P, &padt, apply);
|
||||
|
||||
pln->n = n;
|
||||
pln->is = p->sz->dims[0].is;
|
||||
pln->os = p->sz->dims[0].os;
|
||||
pln->cld = cld;
|
||||
pln->td = 0;
|
||||
|
||||
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
|
||||
|
||||
X(ops_zero)(&ops);
|
||||
ops.other = 4 + (n-1)/2 * 5 + (n-2)/2 * 5;
|
||||
ops.add = (n-1)/2 * 4 + (n-2)/2 * 1;
|
||||
ops.mul = 1 + (n-1)/2 * 2;
|
||||
if (n % 2 == 0)
|
||||
ops.mul += 1;
|
||||
|
||||
X(ops_zero)(&pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
|
||||
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
|
||||
|
||||
return &(pln->super.super);
|
||||
}
|
||||
|
||||
/* constructor */
|
||||
static solver *mksolver(void)
|
||||
{
|
||||
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
|
||||
S *slv = MKSOLVER(S, &sadt);
|
||||
return &(slv->super);
|
||||
}
|
||||
|
||||
void X(rodft00e_r2hc_register)(planner *p)
|
||||
{
|
||||
REGISTER_SOLVER(p, mksolver());
|
||||
}
|
||||
Reference in New Issue
Block a user