This commit is contained in:
2025-07-12 12:17:44 +03:00
parent c759f60ff7
commit 792e1b937a
3507 changed files with 492613 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS =
noinst_LTLIBRARIES = libreodft.la
# no longer used due to numerical problems
EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c
libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \
reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \
rodft00e-r2hc-pad.c reodft00e-splitradix.c
# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c

View File

@@ -0,0 +1,787 @@
# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = reodft
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
$(top_srcdir)/m4/acx_pthread.m4 \
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
$(top_srcdir)/m4/ax_gcc_version.m4 \
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libreodft_la_LIBADD =
am_libreodft_la_OBJECTS = conf.lo reodft010e-r2hc.lo \
reodft11e-radix2.lo reodft11e-r2hc-odd.lo redft00e-r2hc-pad.lo \
rodft00e-r2hc-pad.lo reodft00e-splitradix.lo
libreodft_la_OBJECTS = $(am_libreodft_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__maybe_remake_depfiles = depfiles
am__depfiles_remade = ./$(DEPDIR)/conf.Plo \
./$(DEPDIR)/redft00e-r2hc-pad.Plo \
./$(DEPDIR)/reodft00e-splitradix.Plo \
./$(DEPDIR)/reodft010e-r2hc.Plo \
./$(DEPDIR)/reodft11e-r2hc-odd.Plo \
./$(DEPDIR)/reodft11e-radix2.Plo \
./$(DEPDIR)/rodft00e-r2hc-pad.Plo
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libreodft_la_SOURCES)
DIST_SOURCES = $(libreodft_la_SOURCES)
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir distdir-am
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AS = @AS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AVX2_CFLAGS = @AVX2_CFLAGS@
AVX512_CFLAGS = @AVX512_CFLAGS@
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
AVX_CFLAGS = @AVX_CFLAGS@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CHECK_PL_OPTS = @CHECK_PL_OPTS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
C_MPI_FINT = @C_MPI_FINT@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
F77 = @F77@
FFLAGS = @FFLAGS@
FGREP = @FGREP@
FLIBS = @FLIBS@
GREP = @GREP@
INDENT = @INDENT@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
KCVI_CFLAGS = @KCVI_CFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBQUADMATH = @LIBQUADMATH@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
MPIRUN = @MPIRUN@
NEON_CFLAGS = @NEON_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OCAMLBUILD = @OCAMLBUILD@
OPENMP_CFLAGS = @OPENMP_CFLAGS@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
POW_LIB = @POW_LIB@
PRECISION = @PRECISION@
PREC_SUFFIX = @PREC_SUFFIX@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
STRIP = @STRIP@
THREADLIBS = @THREADLIBS@
VERSION = @VERSION@
VSX_CFLAGS = @VSX_CFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
ac_ct_F77 = @ac_ct_F77@
acx_pthread_config = @acx_pthread_config@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AM_CPPFLAGS = -I $(top_srcdir)
SUBDIRS =
noinst_LTLIBRARIES = libreodft.la
# no longer used due to numerical problems
EXTRA_DIST = reodft11e-r2hc.c redft00e-r2hc.c rodft00e-r2hc.c
libreodft_la_SOURCES = conf.c reodft.h reodft010e-r2hc.c \
reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c \
rodft00e-r2hc-pad.c reodft00e-splitradix.c
all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu reodft/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu reodft/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libreodft.la: $(libreodft_la_OBJECTS) $(libreodft_la_DEPENDENCIES) $(EXTRA_libreodft_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libreodft_la_OBJECTS) $(libreodft_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conf.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/redft00e-r2hc-pad.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft00e-splitradix.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft010e-r2hc.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-r2hc-odd.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reodft11e-radix2.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rodft00e-r2hc-pad.Plo@am__quote@ # am--include-marker
$(am__depfiles_remade):
@$(MKDIR_P) $(@D)
@echo '# dummy' >$@-t && $(am__mv) $@-t $@
am--depfiles: $(am__depfiles_remade)
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) distdir-am
distdir-am: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile $(LTLIBRARIES)
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
distclean: distclean-recursive
-rm -f ./$(DEPDIR)/conf.Plo
-rm -f ./$(DEPDIR)/redft00e-r2hc-pad.Plo
-rm -f ./$(DEPDIR)/reodft00e-splitradix.Plo
-rm -f ./$(DEPDIR)/reodft010e-r2hc.Plo
-rm -f ./$(DEPDIR)/reodft11e-r2hc-odd.Plo
-rm -f ./$(DEPDIR)/reodft11e-radix2.Plo
-rm -f ./$(DEPDIR)/rodft00e-r2hc-pad.Plo
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f ./$(DEPDIR)/conf.Plo
-rm -f ./$(DEPDIR)/redft00e-r2hc-pad.Plo
-rm -f ./$(DEPDIR)/reodft00e-splitradix.Plo
-rm -f ./$(DEPDIR)/reodft010e-r2hc.Plo
-rm -f ./$(DEPDIR)/reodft11e-r2hc-odd.Plo
-rm -f ./$(DEPDIR)/reodft11e-radix2.Plo
-rm -f ./$(DEPDIR)/rodft00e-r2hc-pad.Plo
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
am--depfiles check check-am clean clean-generic clean-libtool \
clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
uninstall-am
.PRECIOUS: Makefile
# redft00e-r2hc.c rodft00e-r2hc.c reodft11e-r2hc.c
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

45
fftw-3.3.10/reodft/conf.c Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "reodft/reodft.h"
static const solvtab s =
{
#if 0 /* 1 to enable "standard" algorithms with substandard accuracy;
you must also add them to Makefile.am to compile these files*/
SOLVTAB(X(redft00e_r2hc_register)),
SOLVTAB(X(rodft00e_r2hc_register)),
SOLVTAB(X(reodft11e_r2hc_register)),
#endif
SOLVTAB(X(redft00e_r2hc_pad_register)),
SOLVTAB(X(rodft00e_r2hc_pad_register)),
SOLVTAB(X(reodft00e_splitradix_register)),
SOLVTAB(X(reodft010e_r2hc_register)),
SOLVTAB(X(reodft11e_radix2_r2hc_register)),
SOLVTAB(X(reodft11e_r2hc_odd_register)),
SOLVTAB_END
};
void X(reodft_conf_standard)(planner *p)
{
X(solvtab_exec)(s, p);
}

View File

@@ -0,0 +1,197 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do a REDFT00 problem via an R2HC problem, padded symmetrically to
twice the size. This is asymptotically a factor of ~2 worse than
redft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
Recipes), but we abandoned the latter after we discovered that it
has intrinsic accuracy problems. */
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld, *cldcpy;
INT is;
INT n;
INT vl;
INT ivs, ovs;
} P;
static void apply(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *buf;
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[0];
for (i = 1; i < n; ++i) {
R a = I[i * is];
buf[i] = a;
buf[2*n - i] = a;
}
buf[i] = I[i * is]; /* i == n, Nyquist */
/* r2hc transform of size 2*n */
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* copy n+1 real numbers (real parts of hc array) from buf to O */
{
plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
cldcpy->apply((plan *) cldcpy, buf, O);
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
X(plan_awake)(ego->cld, wakefulness);
X(plan_awake)(ego->cldcpy, wakefulness);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cldcpy);
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(redft00e-r2hc-pad-%D%v%(%p%)%(%p%))",
ego->n + 1, ego->vl, ego->cld, ego->cldcpy);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->kind[0] == REDFT00
&& p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld = (plan *) 0, *cldcpy;
R *buf = (R *) 0;
INT n;
INT vl, ivs, ovs;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
goto nada;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n - 1;
A(n > 0);
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1),
X(mktensor_0d)(),
buf, buf, R2HC));
if (!cld)
goto nada;
X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
cldcpy =
X(mkplan_d)(plnr,
X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
X(mktensor_1d)(n+1,1,
p->sz->dims[0].os),
buf, TAINT(p->O, ovs), R2HC));
if (!cldcpy)
goto nada;
X(ifree)(buf);
pln = MKPLAN_RDFT(P, &padt, apply);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->cld = cld;
pln->cldcpy = cldcpy;
pln->vl = vl;
pln->ivs = ivs;
pln->ovs = ovs;
X(ops_zero)(&ops);
ops.other = n + 2*n; /* loads + stores (input -> buf) */
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
return &(pln->super.super);
nada:
X(ifree0)(buf);
if (cld)
X(plan_destroy_internal)(cld);
return (plan *)0;
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(redft00e_r2hc_pad_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,215 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing.
This code uses the trick from FFTPACK, also documented in a similar
form by Numerical Recipes. Unfortunately, this algorithm seems to
have intrinsic numerical problems (similar to those in
reodft11e-r2hc.c), possibly due to the fact that it multiplies its
input by a cosine, causing a loss of precision near the zero. For
transforms of 16k points, it has already lost three or four decimal
places of accuracy, which we deem unacceptable.
So, we have abandoned this algorithm in favor of the one in
redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
The only other alternative in the literature that does not have
similar numerical difficulties seems to be the direct adaptation of
the Cooley-Tukey decomposition for symmetric data, but this would
require a whole new set of codelets and it's not clear that it's
worth it at this point. However, we did implement the latter
algorithm for the specific case of odd n (logically adapting the
split-radix algorithm); see reodft00e-splitradix.c. */
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
twid *td;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
} P;
static void apply(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
E csum;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[0] + I[is * n];
csum = I[0] - I[is * n];
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb;
a = I[is * i];
b = I[is * (n - i)];
csum += W[2*i] * (amb = K(2.0)*(a - b));
amb = W[2*i+1] * amb;
apb = (a + b);
buf[i] = apb - amb;
buf[n - i] = apb + amb;
}
if (i == n - i) {
buf[i] = K(2.0) * I[is * i];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* FIXME: use recursive/cascade summation for better stability? */
O[0] = buf[0];
O[os] = csum;
for (i = 1; i + i < n; ++i) {
INT k = i + i;
O[os * k] = buf[i];
O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i];
}
if (i + i == n) {
O[os * n] = buf[i];
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr redft00e_tw[] = {
{ TW_COS, 0, 1 },
{ TW_SIN, 0, 1 },
{ TW_NEXT, 1, 0 }
};
X(plan_awake)(ego->cld, wakefulness);
X(twiddle_awake)(wakefulness,
&ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(redft00e-r2hc-%D%v%(%p%))", ego->n + 1, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->kind[0] == REDFT00
&& p->sz->dims[0].n > 1 /* n == 1 is not well-defined */
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n - 1;
A(n > 0);
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, apply);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->td = 0;
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
ops.add = 2 + (n-1)/2 * 5;
ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(redft00e_r2hc_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef __REODFT_H__
#define __REODFT_H__
#include "kernel/ifftw.h"
#include "rdft/rdft.h"
#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
void X(redft00e_r2hc_register)(planner *p);
void X(redft00e_r2hc_pad_register)(planner *p);
void X(rodft00e_r2hc_register)(planner *p);
void X(rodft00e_r2hc_pad_register)(planner *p);
void X(reodft00e_splitradix_register)(planner *p);
void X(reodft010e_r2hc_register)(planner *p);
void X(reodft11e_r2hc_register)(planner *p);
void X(reodft11e_radix2_r2hc_register)(planner *p);
void X(reodft11e_r2hc_odd_register)(planner *p);
/* configurations */
void X(reodft_conf_standard)(planner *p);
#endif /* __REODFT_H__ */

View File

@@ -0,0 +1,354 @@
/*
* Copyright (c) 2005 Matteo Frigo
* Copyright (c) 2005 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do an R{E,O}DFT00 problem (of an odd length n) recursively via an
R{E,O}DFT00 problem and an RDFT problem of half the length.
This works by "logically" expanding the array to a real-even/odd DFT of
length 2n-/+2 and then applying the split-radix algorithm.
In this way, we can avoid having to pad to twice the length
(ala redft00-r2hc-pad), saving a factor of ~2 for n=2^m+/-1,
but don't incur the accuracy loss that the "ordinary" algorithm
sacrifices (ala redft00-r2hc.c).
*/
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *clde, *cldo;
twid *td;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
} P;
/* redft00 */
static void apply_e(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, j, n = ego->n + 1, n2 = (n-1)/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W - 2;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
/* do size (n-1)/2 r2hc transform of odd-indexed elements
with stride 4, "wrapping around" end of array with even
boundary conditions */
for (j = 0, i = 1; i < n; i += 4)
buf[j++] = I[is * i];
for (i = 2*n-2-i; i > 0; i -= 4)
buf[j++] = I[is * i];
{
plan_rdft *cld = (plan_rdft *) ego->cldo;
cld->apply((plan *) cld, buf, buf);
}
/* do size (n+1)/2 redft00 of the even-indexed elements,
writing to O: */
{
plan_rdft *cld = (plan_rdft *) ego->clde;
cld->apply((plan *) cld, I, O);
}
/* combine the results with the twiddle factors to get output */
{ /* DC element */
E b20 = O[0], b0 = K(2.0) * buf[0];
O[0] = b20 + b0;
O[2*(n2*os)] = b20 - b0;
/* O[n2*os] = O[n2*os]; */
}
for (i = 1; i < n2 - i; ++i) {
E ap, am, br, bi, wr, wi, wbr, wbi;
br = buf[i];
bi = buf[n2 - i];
wr = W[2*i];
wi = W[2*i+1];
#if FFT_SIGN == -1
wbr = K(2.0) * (wr*br + wi*bi);
wbi = K(2.0) * (wr*bi - wi*br);
#else
wbr = K(2.0) * (wr*br - wi*bi);
wbi = K(2.0) * (wr*bi + wi*br);
#endif
ap = O[i*os];
O[i*os] = ap + wbr;
O[(2*n2 - i)*os] = ap - wbr;
am = O[(n2 - i)*os];
#if FFT_SIGN == -1
O[(n2 - i)*os] = am - wbi;
O[(n2 + i)*os] = am + wbi;
#else
O[(n2 - i)*os] = am + wbi;
O[(n2 + i)*os] = am - wbi;
#endif
}
if (i == n2 - i) { /* Nyquist element */
E ap, wbr;
wbr = K(2.0) * (W[2*i] * buf[i]);
ap = O[i*os];
O[i*os] = ap + wbr;
O[(2*n2 - i)*os] = ap - wbr;
}
}
X(ifree)(buf);
}
/* rodft00 */
static void apply_o(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, j, n = ego->n - 1, n2 = (n+1)/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W - 2;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
/* do size (n+1)/2 r2hc transform of even-indexed elements
with stride 4, "wrapping around" end of array with odd
boundary conditions */
for (j = 0, i = 0; i < n; i += 4)
buf[j++] = I[is * i];
for (i = 2*n-i; i > 0; i -= 4)
buf[j++] = -I[is * i];
{
plan_rdft *cld = (plan_rdft *) ego->cldo;
cld->apply((plan *) cld, buf, buf);
}
/* do size (n-1)/2 rodft00 of the odd-indexed elements,
writing to O: */
{
plan_rdft *cld = (plan_rdft *) ego->clde;
if (I == O) {
/* can't use I+is and I, subplan would lose in-placeness */
cld->apply((plan *) cld, I + is, I + is);
/* we could maybe avoid this copy by modifying the
twiddle loop, but currently I can't be bothered. */
A(is >= os);
for (i = 0; i < n2-1; ++i)
O[os*i] = I[is*(i+1)];
}
else
cld->apply((plan *) cld, I + is, O);
}
/* combine the results with the twiddle factors to get output */
O[(n2-1)*os] = K(2.0) * buf[0];
for (i = 1; i < n2 - i; ++i) {
E ap, am, br, bi, wr, wi, wbr, wbi;
br = buf[i];
bi = buf[n2 - i];
wr = W[2*i];
wi = W[2*i+1];
#if FFT_SIGN == -1
wbr = K(2.0) * (wr*br + wi*bi);
wbi = K(2.0) * (wi*br - wr*bi);
#else
wbr = K(2.0) * (wr*br - wi*bi);
wbi = K(2.0) * (wr*bi + wi*br);
#endif
ap = O[(i-1)*os];
O[(i-1)*os] = wbi + ap;
O[(2*n2-1 - i)*os] = wbi - ap;
am = O[(n2-1 - i)*os];
#if FFT_SIGN == -1
O[(n2-1 - i)*os] = wbr + am;
O[(n2-1 + i)*os] = wbr - am;
#else
O[(n2-1 - i)*os] = wbr + am;
O[(n2-1 + i)*os] = wbr - am;
#endif
}
if (i == n2 - i) { /* Nyquist element */
E ap, wbi;
wbi = K(2.0) * (W[2*i+1] * buf[i]);
ap = O[(i-1)*os];
O[(i-1)*os] = wbi + ap;
O[(2*n2-1 - i)*os] = wbi - ap;
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr reodft00e_tw[] = {
{ TW_COS, 1, 1 },
{ TW_SIN, 1, 1 },
{ TW_NEXT, 1, 0 }
};
X(plan_awake)(ego->clde, wakefulness);
X(plan_awake)(ego->cldo, wakefulness);
X(twiddle_awake)(wakefulness, &ego->td, reodft00e_tw,
2*ego->n, 1, ego->n/4);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cldo);
X(plan_destroy_internal)(ego->clde);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
if (ego->super.apply == apply_e)
p->print(p, "(redft00e-splitradix-%D%v%(%p%)%(%p%))",
ego->n + 1, ego->vl, ego->clde, ego->cldo);
else
p->print(p, "(rodft00e-splitradix-%D%v%(%p%)%(%p%))",
ego->n - 1, ego->vl, ego->clde, ego->cldo);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& (p->kind[0] == REDFT00 || p->kind[0] == RODFT00)
&& p->sz->dims[0].n > 1 /* don't create size-0 sub-plans */
&& p->sz->dims[0].n % 2 /* odd: 4 divides "logical" DFT */
&& (p->I != p->O || p->vecsz->rnk == 0
|| p->vecsz->dims[0].is == p->vecsz->dims[0].os)
&& (p->kind[0] != RODFT00 || p->I != p->O ||
p->sz->dims[0].is >= p->sz->dims[0].os) /* laziness */
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *clde, *cldo;
R *buf;
INT n, n0;
opcnt ops;
int inplace_odd;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1);
A(n > 0 && n % 2 == 0);
buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS);
inplace_odd = p->kind[0]==RODFT00 && p->I == p->O;
clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is,
inplace_odd ? p->sz->dims[0].is
: p->sz->dims[0].os),
X(mktensor_0d)(),
TAINT(p->I
+ p->sz->dims[0].is * (p->kind[0]==RODFT00),
p->vecsz->rnk ? p->vecsz->dims[0].is : 0),
TAINT(p->O
+ p->sz->dims[0].is * inplace_odd,
p->vecsz->rnk ? p->vecsz->dims[0].os : 0),
p->kind[0]));
if (!clde) {
X(ifree)(buf);
return (plan *)0;
}
cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(
X(mktensor_1d)(n/2, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cldo)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->clde = clde;
pln->cldo = cldo;
pln->td = 0;
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.other = n/2;
ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) +
(n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2;
/* tweak ops.other so that r2hc-pad is used for small sizes, which
seems to be a lot faster on my machine: */
ops.other += 256;
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(reodft00e_splitradix_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,410 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do an R{E,O}DFT{01,10} problem via an R2HC problem, with some
pre/post-processing ala FFTPACK. */
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
twid *td;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
rdft_kind kind;
} P;
/* A real-even-01 DFT operates logically on a size-4N array:
I 0 -r(I*) -I 0 r(I*),
where r denotes reversal and * denotes deletion of the 0th element.
To compute the transform of this, we imagine performing a radix-4
(real-input) DIF step, which turns the size-4N DFT into 4 size-N
(contiguous) DFTs, two of which are zero and two of which are
conjugates. The non-redundant size-N DFT has halfcomplex input, so
we can do it with a size-N hc2r transform. (In order to share
plans with the re10 (inverse) transform, however, we use the DHT
trick to re-express the hc2r problem as r2hc. This has little cost
since we are already pre- and post-processing the data in {i,n-i}
order.) Finally, we have to write out the data in the correct
order...the two size-N redundant (conjugate) hc2r DFTs correspond
to the even and odd outputs in O (i.e. the usual interleaved output
of DIF transforms); since this data has even symmetry, we only
write the first half of it.
The real-even-10 DFT is just the reverse of these steps, i.e. a
radix-4 DIT transform. There, however, we just use the r2hc
transform naturally without resorting to the DHT trick.
A real-odd-01 DFT is very similar, except that the input is
0 I (rI)* 0 -I -(rI)*. This format, however, can be transformed
into precisely the real-even-01 format above by sending I -> rI
and shifting the array by N. The former swap is just another
transformation on the input during preprocessing; the latter
multiplies the even/odd outputs by i/-i, which combines with
the factor of -i (to take the imaginary part) to simply flip
the sign of the odd outputs. Vice-versa for real-odd-10.
The FFTPACK source code was very helpful in working this out.
(They do unnecessary passes over the array, though.) The same
algorithm is also described in:
John Makhoul, "A fast cosine transform in one and two dimensions,"
IEEE Trans. on Acoust. Speech and Sig. Proc., ASSP-28 (1), 27--34 (1980).
Note that Numerical Recipes suggests a different algorithm that
requires more operations and uses trig. functions for both the pre-
and post-processing passes.
*/
static void apply_re01(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[0];
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb, wa, wb;
a = I[is * i];
b = I[is * (n - i)];
apb = a + b;
amb = a - b;
wa = W[2*i];
wb = W[2*i + 1];
buf[i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
if (i == n - i) {
buf[i] = K(2.0) * I[is * i] * W[2*i];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
O[0] = buf[0];
for (i = 1; i < n - i; ++i) {
E a, b;
INT k;
a = buf[i];
b = buf[n - i];
k = i + i;
O[os * (k - 1)] = a - b;
O[os * k] = a + b;
}
if (i == n - i) {
O[os * (n - 1)] = buf[i];
}
}
X(ifree)(buf);
}
/* ro01 is same as re01, but with i <-> n - 1 - i in the input and
the sign of the odd output elements flipped. */
static void apply_ro01(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[is * (n - 1)];
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb, wa, wb;
a = I[is * (n - 1 - i)];
b = I[is * (i - 1)];
apb = a + b;
amb = a - b;
wa = W[2*i];
wb = W[2*i+1];
buf[i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
if (i == n - i) {
buf[i] = K(2.0) * I[is * (i - 1)] * W[2*i];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
O[0] = buf[0];
for (i = 1; i < n - i; ++i) {
E a, b;
INT k;
a = buf[i];
b = buf[n - i];
k = i + i;
O[os * (k - 1)] = b - a;
O[os * k] = a + b;
}
if (i == n - i) {
O[os * (n - 1)] = -buf[i];
}
}
X(ifree)(buf);
}
static void apply_re10(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[0];
for (i = 1; i < n - i; ++i) {
E u, v;
INT k = i + i;
u = I[is * (k - 1)];
v = I[is * k];
buf[n - i] = u;
buf[i] = v;
}
if (i == n - i) {
buf[i] = I[is * (n - 1)];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
O[0] = K(2.0) * buf[0];
for (i = 1; i < n - i; ++i) {
E a, b, wa, wb;
a = K(2.0) * buf[i];
b = K(2.0) * buf[n - i];
wa = W[2*i];
wb = W[2*i + 1];
O[os * i] = wa * a + wb * b;
O[os * (n - i)] = wb * a - wa * b;
}
if (i == n - i) {
O[os * i] = K(2.0) * buf[i] * W[2*i];
}
}
X(ifree)(buf);
}
/* ro10 is same as re10, but with i <-> n - 1 - i in the output and
the sign of the odd input elements flipped. */
static void apply_ro10(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = I[0];
for (i = 1; i < n - i; ++i) {
E u, v;
INT k = i + i;
u = -I[is * (k - 1)];
v = I[is * k];
buf[n - i] = u;
buf[i] = v;
}
if (i == n - i) {
buf[i] = -I[is * (n - 1)];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
O[os * (n - 1)] = K(2.0) * buf[0];
for (i = 1; i < n - i; ++i) {
E a, b, wa, wb;
a = K(2.0) * buf[i];
b = K(2.0) * buf[n - i];
wa = W[2*i];
wb = W[2*i + 1];
O[os * (n - 1 - i)] = wa * a + wb * b;
O[os * (i - 1)] = wb * a - wa * b;
}
if (i == n - i) {
O[os * (i - 1)] = K(2.0) * buf[i] * W[2*i];
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr reodft010e_tw[] = {
{ TW_COS, 0, 1 },
{ TW_SIN, 0, 1 },
{ TW_NEXT, 1, 0 }
};
X(plan_awake)(ego->cld, wakefulness);
X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw,
4*ego->n, 1, ego->n/2+1);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(%se-r2hc-%D%v%(%p%))",
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& (p->kind[0] == REDFT01 || p->kind[0] == REDFT10
|| p->kind[0] == RODFT01 || p->kind[0] == RODFT10)
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
switch (p->kind[0]) {
case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break;
case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break;
case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break;
case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break;
default: A(0); return (plan*)0;
}
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->td = 0;
pln->kind = p->kind[0];
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5;
if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) {
ops.add = (n-1)/2 * 6;
ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2;
}
else { /* 10 transforms */
ops.add = (n-1)/2 * 2;
ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2;
}
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(reodft010e_r2hc_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,300 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do an R{E,O}DFT11 problem via an R2HC problem of the same *odd* size,
with some permutations and post-processing, as described in:
S. C. Chan and K. L. Ho, "Fast algorithms for computing the
discrete cosine transform," IEEE Trans. Circuits Systems II:
Analog & Digital Sig. Proc. 39 (3), 185--190 (1992).
(For even sizes, see reodft11e-radix2.c.)
This algorithm is related to the 8 x n prime-factor-algorithm (PFA)
decomposition of the size 8n "logical" DFT corresponding to the
R{EO}DFT11.
Aside from very confusing notation (several symbols are redefined
from one line to the next), be aware that this paper has some
errors. In particular, the signs are wrong in Eqs. (34-35). Also,
Eqs. (36-37) should be simply C(k) = C(2k + 1 mod N), and similarly
for S (or, equivalently, the second cases should have 2*N - 2*k - 1
instead of N - k - 1). Note also that in their definition of the
DFT, similarly to FFTW's, the exponent's sign is -1, but they
forgot to correspondingly multiply S (the sine terms) by -1.
*/
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
rdft_kind kind;
} P;
static DK(SQRT2, +1.4142135623730950488016887242096980785696718753769);
#define SGN_SET(x, i) ((i) % 2 ? -(x) : (x))
static void apply_re11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n, n2 = n/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
{
INT m;
for (i = 0, m = n2; m < n; ++i, m += 4)
buf[i] = I[is * m];
for (; m < 2 * n; ++i, m += 4)
buf[i] = -I[is * (2*n - m - 1)];
for (; m < 3 * n; ++i, m += 4)
buf[i] = -I[is * (m - 2*n)];
for (; m < 4 * n; ++i, m += 4)
buf[i] = I[is * (4*n - m - 1)];
m -= 4 * n;
for (; i < n; ++i, m += 4)
buf[i] = I[is * m];
}
{ /* child plan: R2HC of size n */
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
for (i = 0; i + i + 1 < n2; ++i) {
INT k = i + i + 1;
E c1, s1;
E c2, s2;
c1 = buf[k];
c2 = buf[k + 1];
s2 = buf[n - (k + 1)];
s1 = buf[n - k];
O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2) +
SGN_SET(s1, i/2));
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2) -
SGN_SET(s1, (n-(i+1))/2));
O[os * (n2 - (i+1))] = SQRT2 * (SGN_SET(c2, (n2-i)/2) -
SGN_SET(s2, (n2-(i+1))/2));
O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2) +
SGN_SET(s2, (n2+(i+1))/2));
}
if (i + i + 1 == n2) {
E c, s;
c = buf[n2];
s = buf[n - n2];
O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2) +
SGN_SET(s, i/2));
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2) +
SGN_SET(s, (i+1)/2));
}
O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2);
}
X(ifree)(buf);
}
/* like for rodft01, rodft11 is obtained from redft11 by
reversing the input and flipping the sign of every other output. */
static void apply_ro11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n, n2 = n/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
{
INT m;
for (i = 0, m = n2; m < n; ++i, m += 4)
buf[i] = I[is * (n - 1 - m)];
for (; m < 2 * n; ++i, m += 4)
buf[i] = -I[is * (m - n)];
for (; m < 3 * n; ++i, m += 4)
buf[i] = -I[is * (3*n - 1 - m)];
for (; m < 4 * n; ++i, m += 4)
buf[i] = I[is * (m - 3*n)];
m -= 4 * n;
for (; i < n; ++i, m += 4)
buf[i] = I[is * (n - 1 - m)];
}
{ /* child plan: R2HC of size n */
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
for (i = 0; i + i + 1 < n2; ++i) {
INT k = i + i + 1;
INT j;
E c1, s1;
E c2, s2;
c1 = buf[k];
c2 = buf[k + 1];
s2 = buf[n - (k + 1)];
s1 = buf[n - k];
O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2 + i) +
SGN_SET(s1, i/2 + i));
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2 + i) -
SGN_SET(s1, (n-(i+1))/2 + i));
j = n2 - (i+1);
O[os * j] = SQRT2 * (SGN_SET(c2, (n2-i)/2 + j) -
SGN_SET(s2, (n2-(i+1))/2 + j));
O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2 + j) +
SGN_SET(s2, (n2+(i+1))/2 + j));
}
if (i + i + 1 == n2) {
E c, s;
c = buf[n2];
s = buf[n - n2];
O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2 + i) +
SGN_SET(s, i/2 + i));
O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2 + i) +
SGN_SET(s, (i+1)/2 + i));
}
O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2 + n2);
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
X(plan_awake)(ego->cld, wakefulness);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(%se-r2hc-odd-%D%v%(%p%))",
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->sz->dims[0].n % 2 == 1
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->kind = p->kind[0];
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.add = n - 1;
ops.mul = n;
ops.other = 4*n;
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(reodft11e_r2hc_odd_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,294 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do an R{E,O}DFT11 problem via an R2HC problem, with some
pre/post-processing ala FFTPACK. Use a trick from:
S. C. Chan and K. L. Ho, "Direct methods for computing discrete
sinusoidal transforms," IEE Proceedings F 137 (6), 433--442 (1990).
to re-express as an REDFT01 (DCT-III) problem.
NOTE: We no longer use this algorithm, because it turns out to suffer
a catastrophic loss of accuracy for certain inputs, apparently because
its post-processing multiplies the output by a cosine. Near the zero
of the cosine, the REDFT01 must produce a near-singular output.
*/
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
twid *td, *td2;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
rdft_kind kind;
} P;
static void apply_re11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W;
R *buf;
E cur;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
/* I wish that this didn't require an extra pass. */
/* FIXME: use recursive/cascade summation for better stability? */
buf[n - 1] = cur = K(2.0) * I[is * (n - 1)];
for (i = n - 1; i > 0; --i) {
E curnew;
buf[(i - 1)] = curnew = K(2.0) * I[is * (i - 1)] - cur;
cur = curnew;
}
W = ego->td->W;
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb, wa, wb;
a = buf[i];
b = buf[n - i];
apb = a + b;
amb = a - b;
wa = W[2*i];
wb = W[2*i + 1];
buf[i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
if (i == n - i) {
buf[i] = K(2.0) * buf[i] * W[2*i];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
W = ego->td2->W;
O[0] = W[0] * buf[0];
for (i = 1; i < n - i; ++i) {
E a, b;
INT k;
a = buf[i];
b = buf[n - i];
k = i + i;
O[os * (k - 1)] = W[k - 1] * (a - b);
O[os * k] = W[k] * (a + b);
}
if (i == n - i) {
O[os * (n - 1)] = W[n - 1] * buf[i];
}
}
X(ifree)(buf);
}
/* like for rodft01, rodft11 is obtained from redft11 by
reversing the input and flipping the sign of every other output. */
static void apply_ro11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W;
R *buf;
E cur;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
/* I wish that this didn't require an extra pass. */
/* FIXME: use recursive/cascade summation for better stability? */
buf[n - 1] = cur = K(2.0) * I[0];
for (i = n - 1; i > 0; --i) {
E curnew;
buf[(i - 1)] = curnew = K(2.0) * I[is * (n - i)] - cur;
cur = curnew;
}
W = ego->td->W;
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb, wa, wb;
a = buf[i];
b = buf[n - i];
apb = a + b;
amb = a - b;
wa = W[2*i];
wb = W[2*i + 1];
buf[i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
if (i == n - i) {
buf[i] = K(2.0) * buf[i] * W[2*i];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
W = ego->td2->W;
O[0] = W[0] * buf[0];
for (i = 1; i < n - i; ++i) {
E a, b;
INT k;
a = buf[i];
b = buf[n - i];
k = i + i;
O[os * (k - 1)] = W[k - 1] * (b - a);
O[os * k] = W[k] * (a + b);
}
if (i == n - i) {
O[os * (n - 1)] = -W[n - 1] * buf[i];
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr reodft010e_tw[] = {
{ TW_COS, 0, 1 },
{ TW_SIN, 0, 1 },
{ TW_NEXT, 1, 0 }
};
static const tw_instr reodft11e_tw[] = {
{ TW_COS, 1, 1 },
{ TW_NEXT, 2, 0 }
};
X(plan_awake)(ego->cld, wakefulness);
X(twiddle_awake)(wakefulness,
&ego->td, reodft010e_tw, 4*ego->n, 1, ego->n/2+1);
X(twiddle_awake)(wakefulness,
&ego->td2, reodft11e_tw, 8*ego->n, 1, ego->n * 2);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(%se-r2hc-%D%v%(%p%))",
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->td = pln->td2 = 0;
pln->kind = p->kind[0];
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.other = 5 + (n-1) * 2 + (n-1)/2 * 12 + (1 - n % 2) * 6;
ops.add = (n - 1) * 1 + (n-1)/2 * 6;
ops.mul = 2 + (n-1) * 1 + (n-1)/2 * 6 + (1 - n % 2) * 3;
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(reodft11e_r2hc_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,513 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do an R{E,O}DFT11 problem of *even* size by a pair of R2HC problems
of half the size, plus some pre/post-processing. Use a trick from:
Zhongde Wang, "On computing the discrete Fourier and cosine transforms,"
IEEE Trans. Acoust. Speech Sig. Proc. ASSP-33 (4), 1341--1344 (1985).
to re-express as a pair of half-size REDFT01 (DCT-III) problems. Our
implementation looks quite a bit different from the algorithm described
in the paper because we combined the paper's pre/post-processing with
the pre/post-processing used to turn REDFT01 into R2HC. (Also, the
paper uses a DCT/DST pair, but we turn the DST into a DCT via the
usual reordering/sign-flip trick. We additionally combined a couple
of the matrices/transformations of the paper into a single pass.)
NOTE: We originally used a simpler method by S. C. Chan and K. L. Ho
that turned out to have numerical problems; see reodft11e-r2hc.c.
(For odd sizes, see reodft11e-r2hc-odd.c.)
*/
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
twid *td, *td2;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
rdft_kind kind;
} P;
static void apply_re11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n, n2 = n/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *W2;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = K(2.0) * I[0];
buf[n2] = K(2.0) * I[is * (n - 1)];
for (i = 1; i + i < n2; ++i) {
INT k = i + i;
E a, b, a2, b2;
{
E u, v;
u = I[is * (k - 1)];
v = I[is * k];
a = u + v;
b2 = u - v;
}
{
E u, v;
u = I[is * (n - k - 1)];
v = I[is * (n - k)];
b = u + v;
a2 = u - v;
}
{
E wa, wb;
wa = W[2*i];
wb = W[2*i + 1];
{
E apb, amb;
apb = a + b;
amb = a - b;
buf[i] = wa * amb + wb * apb;
buf[n2 - i] = wa * apb - wb * amb;
}
{
E apb, amb;
apb = a2 + b2;
amb = a2 - b2;
buf[n2 + i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
}
}
if (i + i == n2) {
E u, v;
u = I[is * (n2 - 1)];
v = I[is * n2];
buf[i] = (u + v) * (W[2*i] * K(2.0));
buf[n - i] = (u - v) * (W[2*i] * K(2.0));
}
/* child plan: two r2hc's of size n/2 */
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
W2 = ego->td2->W;
{ /* i == 0 case */
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = buf[0];
b = buf[n2];
O[0] = wa * a + wb * b;
O[os * (n - 1)] = wb * a - wa * b;
}
W2 += 2;
for (i = 1; i + i < n2; ++i, W2 += 2) {
INT k;
E u, v, u2, v2;
u = buf[i];
v = buf[n2 - i];
u2 = buf[n2 + i];
v2 = buf[n - i];
k = (i + i) - 1;
{
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = u - v;
b = v2 - u2;
O[os * k] = wa * a + wb * b;
O[os * (n - 1 - k)] = wb * a - wa * b;
}
++k;
W2 += 2;
{
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = u + v;
b = u2 + v2;
O[os * k] = wa * a + wb * b;
O[os * (n - 1 - k)] = wb * a - wa * b;
}
}
if (i + i == n2) {
INT k = (i + i) - 1;
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = buf[i];
b = buf[n2 + i];
O[os * k] = wa * a - wb * b;
O[os * (n - 1 - k)] = wb * a + wa * b;
}
}
X(ifree)(buf);
}
#if 0
/* This version of apply_re11 uses REDFT01 child plans, more similar
to the original paper by Z. Wang. We keep it around for reference
(it is simpler) and because it may become more efficient if we
ever implement REDFT01 codelets. */
static void apply_re11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = K(2.0) * I[0];
buf[n/2] = K(2.0) * I[is * (n - 1)];
for (i = 1; i + i < n; ++i) {
INT k = i + i;
E a, b;
a = I[is * (k - 1)];
b = I[is * k];
buf[i] = a + b;
buf[n - i] = a - b;
}
/* child plan: two redft01's (DCT-III) */
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
W = ego->td2->W;
for (i = 0; i + 1 < n/2; ++i, W += 2) {
{
E wa, wb;
E a, b;
wa = W[0]; /* cos */
wb = W[1]; /* sin */
a = buf[i];
b = buf[n/2 + i];
O[os * i] = wa * a + wb * b;
O[os * (n - 1 - i)] = wb * a - wa * b;
}
++i;
W += 2;
{
E wa, wb;
E a, b;
wa = W[0]; /* cos */
wb = W[1]; /* sin */
a = buf[i];
b = buf[n/2 + i];
O[os * i] = wa * a - wb * b;
O[os * (n - 1 - i)] = wb * a + wa * b;
}
}
if (i < n/2) {
E wa, wb;
E a, b;
wa = W[0]; /* cos */
wb = W[1]; /* sin */
a = buf[i];
b = buf[n/2 + i];
O[os * i] = wa * a + wb * b;
O[os * (n - 1 - i)] = wb * a - wa * b;
}
}
X(ifree)(buf);
}
#endif /* 0 */
/* like for rodft01, rodft11 is obtained from redft11 by
reversing the input and flipping the sign of every other output. */
static void apply_ro11(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n, n2 = n/2;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *W2;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = K(2.0) * I[is * (n - 1)];
buf[n2] = K(2.0) * I[0];
for (i = 1; i + i < n2; ++i) {
INT k = i + i;
E a, b, a2, b2;
{
E u, v;
u = I[is * (n - k)];
v = I[is * (n - 1 - k)];
a = u + v;
b2 = u - v;
}
{
E u, v;
u = I[is * (k)];
v = I[is * (k - 1)];
b = u + v;
a2 = u - v;
}
{
E wa, wb;
wa = W[2*i];
wb = W[2*i + 1];
{
E apb, amb;
apb = a + b;
amb = a - b;
buf[i] = wa * amb + wb * apb;
buf[n2 - i] = wa * apb - wb * amb;
}
{
E apb, amb;
apb = a2 + b2;
amb = a2 - b2;
buf[n2 + i] = wa * amb + wb * apb;
buf[n - i] = wa * apb - wb * amb;
}
}
}
if (i + i == n2) {
E u, v;
u = I[is * n2];
v = I[is * (n2 - 1)];
buf[i] = (u + v) * (W[2*i] * K(2.0));
buf[n - i] = (u - v) * (W[2*i] * K(2.0));
}
/* child plan: two r2hc's of size n/2 */
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
W2 = ego->td2->W;
{ /* i == 0 case */
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = buf[0];
b = buf[n2];
O[0] = wa * a + wb * b;
O[os * (n - 1)] = wa * b - wb * a;
}
W2 += 2;
for (i = 1; i + i < n2; ++i, W2 += 2) {
INT k;
E u, v, u2, v2;
u = buf[i];
v = buf[n2 - i];
u2 = buf[n2 + i];
v2 = buf[n - i];
k = (i + i) - 1;
{
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = v - u;
b = u2 - v2;
O[os * k] = wa * a + wb * b;
O[os * (n - 1 - k)] = wa * b - wb * a;
}
++k;
W2 += 2;
{
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = u + v;
b = u2 + v2;
O[os * k] = wa * a + wb * b;
O[os * (n - 1 - k)] = wa * b - wb * a;
}
}
if (i + i == n2) {
INT k = (i + i) - 1;
E wa, wb;
E a, b;
wa = W2[0]; /* cos */
wb = W2[1]; /* sin */
a = buf[i];
b = buf[n2 + i];
O[os * k] = wb * b - wa * a;
O[os * (n - 1 - k)] = wa * b + wb * a;
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr reodft010e_tw[] = {
{ TW_COS, 0, 1 },
{ TW_SIN, 0, 1 },
{ TW_NEXT, 1, 0 }
};
static const tw_instr reodft11e_tw[] = {
{ TW_COS, 1, 1 },
{ TW_SIN, 1, 1 },
{ TW_NEXT, 2, 0 }
};
X(plan_awake)(ego->cld, wakefulness);
X(twiddle_awake)(wakefulness, &ego->td, reodft010e_tw,
2*ego->n, 1, ego->n/4+1);
X(twiddle_awake)(wakefulness, &ego->td2, reodft11e_tw,
8*ego->n, 1, ego->n);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(%se-radix2-r2hc-%D%v%(%p%))",
X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->sz->dims[0].n % 2 == 0
&& (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n/2, 1, 1),
X(mktensor_1d)(2, n/2, n/2),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->td = pln->td2 = 0;
pln->kind = p->kind[0];
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.add = 2 + (n/2 - 1)/2 * 20;
ops.mul = 6 + (n/2 - 1)/2 * 16;
ops.other = 4*n + 2 + (n/2 - 1)/2 * 6;
if ((n/2) % 2 == 0) {
ops.add += 4;
ops.mul += 8;
ops.other += 4;
}
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(reodft11e_radix2_r2hc_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,195 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do a RODFT00 problem via an R2HC problem, padded antisymmetrically to
twice the size. This is asymptotically a factor of ~2 worse than
rodft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
Recipes), but we abandoned the latter after we discovered that it
has intrinsic accuracy problems. */
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld, *cldcpy;
INT is;
INT n;
INT vl;
INT ivs, ovs;
} P;
static void apply(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *buf;
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = K(0.0);
for (i = 1; i < n; ++i) {
R a = I[(i-1) * is];
buf[i] = -a;
buf[2*n - i] = a;
}
buf[i] = K(0.0); /* i == n, Nyquist */
/* r2hc transform of size 2*n */
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* copy n-1 real numbers (imag. parts of hc array) from buf to O */
{
plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
cldcpy->apply((plan *) cldcpy, buf+2*n-1, O);
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
X(plan_awake)(ego->cld, wakefulness);
X(plan_awake)(ego->cldcpy, wakefulness);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cldcpy);
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(rodft00e-r2hc-pad-%D%v%(%p%)%(%p%))",
ego->n - 1, ego->vl, ego->cld, ego->cldcpy);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->kind[0] == RODFT00
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld = (plan *) 0, *cldcpy;
R *buf = (R *) 0;
INT n;
INT vl, ivs, ovs;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
goto nada;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n + 1;
A(n > 0);
buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1),
X(mktensor_0d)(),
buf, buf, R2HC));
if (!cld)
goto nada;
X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
cldcpy =
X(mkplan_d)(plnr,
X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
X(mktensor_1d)(n-1,-1,
p->sz->dims[0].os),
buf+2*n-1,TAINT(p->O, ovs), R2HC));
if (!cldcpy)
goto nada;
X(ifree)(buf);
pln = MKPLAN_RDFT(P, &padt, apply);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->cld = cld;
pln->cldcpy = cldcpy;
pln->vl = vl;
pln->ivs = ivs;
pln->ovs = ovs;
X(ops_zero)(&ops);
ops.other = n-1 + 2*n; /* loads + stores (input -> buf) */
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
return &(pln->super.super);
nada:
X(ifree0)(buf);
if (cld)
X(plan_destroy_internal)(cld);
return (plan *)0;
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(rodft00e_r2hc_pad_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}

View File

@@ -0,0 +1,211 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
/* Do a RODFT00 problem via an R2HC problem, with some pre/post-processing.
This code uses the trick from FFTPACK, also documented in a similar
form by Numerical Recipes. Unfortunately, this algorithm seems to
have intrinsic numerical problems (similar to those in
reodft11e-r2hc.c), possibly due to the fact that it multiplies its
input by a sine, causing a loss of precision near the zero. For
transforms of 16k points, it has already lost three or four decimal
places of accuracy, which we deem unacceptable.
So, we have abandoned this algorithm in favor of the one in
rodft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
The only other alternative in the literature that does not have
similar numerical difficulties seems to be the direct adaptation of
the Cooley-Tukey decomposition for antisymmetric data, but this
would require a whole new set of codelets and it's not clear that
it's worth it at this point. However, we did implement the latter
algorithm for the specific case of odd n (logically adapting the
split-radix algorithm); see reodft00e-splitradix.c. */
#include "reodft/reodft.h"
typedef struct {
solver super;
} S;
typedef struct {
plan_rdft super;
plan *cld;
twid *td;
INT is, os;
INT n;
INT vl;
INT ivs, ovs;
} P;
static void apply(const plan *ego_, R *I, R *O)
{
const P *ego = (const P *) ego_;
INT is = ego->is, os = ego->os;
INT i, n = ego->n;
INT iv, vl = ego->vl;
INT ivs = ego->ivs, ovs = ego->ovs;
R *W = ego->td->W;
R *buf;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
buf[0] = 0;
for (i = 1; i < n - i; ++i) {
E a, b, apb, amb;
a = I[is * (i - 1)];
b = I[is * ((n - i) - 1)];
apb = K(2.0) * W[i] * (a + b);
amb = (a - b);
buf[i] = apb + amb;
buf[n - i] = apb - amb;
}
if (i == n - i) {
buf[i] = K(4.0) * I[is * (i - 1)];
}
{
plan_rdft *cld = (plan_rdft *) ego->cld;
cld->apply((plan *) cld, buf, buf);
}
/* FIXME: use recursive/cascade summation for better stability? */
O[0] = buf[0] * 0.5;
for (i = 1; i + i < n - 1; ++i) {
INT k = i + i;
O[os * (k - 1)] = -buf[n - i];
O[os * k] = O[os * (k - 2)] + buf[i];
}
if (i + i == n - 1) {
O[os * (n - 2)] = -buf[n - i];
}
}
X(ifree)(buf);
}
static void awake(plan *ego_, enum wakefulness wakefulness)
{
P *ego = (P *) ego_;
static const tw_instr rodft00e_tw[] = {
{ TW_SIN, 0, 1 },
{ TW_NEXT, 1, 0 }
};
X(plan_awake)(ego->cld, wakefulness);
X(twiddle_awake)(wakefulness,
&ego->td, rodft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
}
static void destroy(plan *ego_)
{
P *ego = (P *) ego_;
X(plan_destroy_internal)(ego->cld);
}
static void print(const plan *ego_, printer *p)
{
const P *ego = (const P *) ego_;
p->print(p, "(rodft00e-r2hc-%D%v%(%p%))", ego->n - 1, ego->vl, ego->cld);
}
static int applicable0(const solver *ego_, const problem *p_)
{
const problem_rdft *p = (const problem_rdft *) p_;
UNUSED(ego_);
return (1
&& p->sz->rnk == 1
&& p->vecsz->rnk <= 1
&& p->kind[0] == RODFT00
);
}
static int applicable(const solver *ego, const problem *p, const planner *plnr)
{
return (!NO_SLOWP(plnr) && applicable0(ego, p));
}
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
P *pln;
const problem_rdft *p;
plan *cld;
R *buf;
INT n;
opcnt ops;
static const plan_adt padt = {
X(rdft_solve), awake, print, destroy
};
if (!applicable(ego_, p_, plnr))
return (plan *)0;
p = (const problem_rdft *) p_;
n = p->sz->dims[0].n + 1;
buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
X(mktensor_0d)(),
buf, buf, R2HC));
X(ifree)(buf);
if (!cld)
return (plan *)0;
pln = MKPLAN_RDFT(P, &padt, apply);
pln->n = n;
pln->is = p->sz->dims[0].is;
pln->os = p->sz->dims[0].os;
pln->cld = cld;
pln->td = 0;
X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
X(ops_zero)(&ops);
ops.other = 4 + (n-1)/2 * 5 + (n-2)/2 * 5;
ops.add = (n-1)/2 * 4 + (n-2)/2 * 1;
ops.mul = 1 + (n-1)/2 * 2;
if (n % 2 == 0)
ops.mul += 1;
X(ops_zero)(&pln->super.super.ops);
X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
return &(pln->super.super);
}
/* constructor */
static solver *mksolver(void)
{
static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
S *slv = MKSOLVER(S, &sadt);
return &(slv->super);
}
void X(rodft00e_r2hc_register)(planner *p)
{
REGISTER_SOLVER(p, mksolver());
}