Updates
This commit is contained in:
25
fftw-3.3.10/genfft/Makefile.am
Normal file
25
fftw-3.3.10/genfft/Makefile.am
Normal file
@@ -0,0 +1,25 @@
|
||||
# this makefile requires GNU make.
|
||||
|
||||
EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \
|
||||
conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \
|
||||
gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \
|
||||
gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \
|
||||
gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \
|
||||
monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \
|
||||
to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \
|
||||
algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \
|
||||
dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \
|
||||
schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \
|
||||
util.mli variable.mli
|
||||
|
||||
GENFFT_NATIVE=gen_notw.native gen_notw_c.native gen_twiddle.native \
|
||||
gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \
|
||||
gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \
|
||||
gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \
|
||||
gen_mdct.native
|
||||
|
||||
all-local::
|
||||
$(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE)
|
||||
|
||||
maintainer-clean-local::
|
||||
$(OCAMLBUILD) -classic-display -clean
|
||||
509
fftw-3.3.10/genfft/Makefile.in
Normal file
509
fftw-3.3.10/genfft/Makefile.in
Normal file
@@ -0,0 +1,509 @@
|
||||
# Makefile.in generated by automake 1.16.3 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
# this makefile requires GNU make.
|
||||
VPATH = @srcdir@
|
||||
am__is_gnu_make = { \
|
||||
if test -z '$(MAKELEVEL)'; then \
|
||||
false; \
|
||||
elif test -n '$(MAKE_HOST)'; then \
|
||||
true; \
|
||||
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
|
||||
true; \
|
||||
else \
|
||||
false; \
|
||||
fi; \
|
||||
}
|
||||
am__make_running_with_option = \
|
||||
case $${target_option-} in \
|
||||
?) ;; \
|
||||
*) echo "am__make_running_with_option: internal error: invalid" \
|
||||
"target option '$${target_option-}' specified" >&2; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
has_opt=no; \
|
||||
sane_makeflags=$$MAKEFLAGS; \
|
||||
if $(am__is_gnu_make); then \
|
||||
sane_makeflags=$$MFLAGS; \
|
||||
else \
|
||||
case $$MAKEFLAGS in \
|
||||
*\\[\ \ ]*) \
|
||||
bs=\\; \
|
||||
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
|
||||
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
|
||||
esac; \
|
||||
fi; \
|
||||
skip_next=no; \
|
||||
strip_trailopt () \
|
||||
{ \
|
||||
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
|
||||
}; \
|
||||
for flg in $$sane_makeflags; do \
|
||||
test $$skip_next = yes && { skip_next=no; continue; }; \
|
||||
case $$flg in \
|
||||
*=*|--*) continue;; \
|
||||
-*I) strip_trailopt 'I'; skip_next=yes;; \
|
||||
-*I?*) strip_trailopt 'I';; \
|
||||
-*O) strip_trailopt 'O'; skip_next=yes;; \
|
||||
-*O?*) strip_trailopt 'O';; \
|
||||
-*l) strip_trailopt 'l'; skip_next=yes;; \
|
||||
-*l?*) strip_trailopt 'l';; \
|
||||
-[dEDm]) skip_next=yes;; \
|
||||
-[JT]) skip_next=yes;; \
|
||||
esac; \
|
||||
case $$flg in \
|
||||
*$$target_option*) has_opt=yes; break;; \
|
||||
esac; \
|
||||
done; \
|
||||
test $$has_opt = yes
|
||||
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
|
||||
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
subdir = genfft
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
|
||||
$(top_srcdir)/m4/acx_pthread.m4 \
|
||||
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
|
||||
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
|
||||
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
|
||||
$(top_srcdir)/m4/ax_gcc_version.m4 \
|
||||
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
|
||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
||||
$(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = $(top_builddir)/config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
am__v_P_0 = false
|
||||
am__v_P_1 = :
|
||||
AM_V_GEN = $(am__v_GEN_@AM_V@)
|
||||
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
|
||||
am__v_GEN_0 = @echo " GEN " $@;
|
||||
am__v_GEN_1 =
|
||||
AM_V_at = $(am__v_at_@AM_V@)
|
||||
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
|
||||
am__v_at_0 = @
|
||||
am__v_at_1 =
|
||||
SOURCES =
|
||||
DIST_SOURCES =
|
||||
am__can_run_installinfo = \
|
||||
case $$AM_UPDATE_INFO_DIR in \
|
||||
n|no|NO) false;; \
|
||||
*) (install-info --version) >/dev/null 2>&1;; \
|
||||
esac
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
|
||||
am__DIST_COMMON = $(srcdir)/Makefile.in
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
ACLOCAL = @ACLOCAL@
|
||||
ALLOCA = @ALLOCA@
|
||||
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
|
||||
AMTAR = @AMTAR@
|
||||
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
|
||||
AR = @AR@
|
||||
AS = @AS@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AVX2_CFLAGS = @AVX2_CFLAGS@
|
||||
AVX512_CFLAGS = @AVX512_CFLAGS@
|
||||
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
|
||||
AVX_CFLAGS = @AVX_CFLAGS@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CHECK_PL_OPTS = @CHECK_PL_OPTS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
|
||||
C_MPI_FINT = @C_MPI_FINT@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
DLLTOOL = @DLLTOOL@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
F77 = @F77@
|
||||
FFLAGS = @FFLAGS@
|
||||
FGREP = @FGREP@
|
||||
FLIBS = @FLIBS@
|
||||
GREP = @GREP@
|
||||
INDENT = @INDENT@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
KCVI_CFLAGS = @KCVI_CFLAGS@
|
||||
LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBQUADMATH = @LIBQUADMATH@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
LIPO = @LIPO@
|
||||
LN_S = @LN_S@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
|
||||
MAINT = @MAINT@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
MPICC = @MPICC@
|
||||
MPILIBS = @MPILIBS@
|
||||
MPIRUN = @MPIRUN@
|
||||
NEON_CFLAGS = @NEON_CFLAGS@
|
||||
NM = @NM@
|
||||
NMEDIT = @NMEDIT@
|
||||
OBJDUMP = @OBJDUMP@
|
||||
OBJEXT = @OBJEXT@
|
||||
OCAMLBUILD = @OCAMLBUILD@
|
||||
OPENMP_CFLAGS = @OPENMP_CFLAGS@
|
||||
OTOOL = @OTOOL@
|
||||
OTOOL64 = @OTOOL64@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_URL = @PACKAGE_URL@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
POW_LIB = @POW_LIB@
|
||||
PRECISION = @PRECISION@
|
||||
PREC_SUFFIX = @PREC_SUFFIX@
|
||||
PTHREAD_CC = @PTHREAD_CC@
|
||||
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
|
||||
PTHREAD_LIBS = @PTHREAD_LIBS@
|
||||
RANLIB = @RANLIB@
|
||||
SED = @SED@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
|
||||
SHELL = @SHELL@
|
||||
SSE2_CFLAGS = @SSE2_CFLAGS@
|
||||
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
|
||||
STRIP = @STRIP@
|
||||
THREADLIBS = @THREADLIBS@
|
||||
VERSION = @VERSION@
|
||||
VSX_CFLAGS = @VSX_CFLAGS@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_AR = @ac_ct_AR@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_F77 = @ac_ct_F77@
|
||||
acx_pthread_config = @acx_pthread_config@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build = @build@
|
||||
build_alias = @build_alias@
|
||||
build_cpu = @build_cpu@
|
||||
build_os = @build_os@
|
||||
build_vendor = @build_vendor@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host = @host@
|
||||
host_alias = @host_alias@
|
||||
host_cpu = @host_cpu@
|
||||
host_os = @host_os@
|
||||
host_vendor = @host_vendor@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
runstatedir = @runstatedir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \
|
||||
conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \
|
||||
gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \
|
||||
gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \
|
||||
gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \
|
||||
monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \
|
||||
to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \
|
||||
algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \
|
||||
dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \
|
||||
schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \
|
||||
util.mli variable.mli
|
||||
|
||||
GENFFT_NATIVE = gen_notw.native gen_notw_c.native gen_twiddle.native \
|
||||
gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \
|
||||
gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \
|
||||
gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \
|
||||
gen_mdct.native
|
||||
|
||||
all: all-am
|
||||
|
||||
.SUFFIXES:
|
||||
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu genfft/Makefile'; \
|
||||
$(am__cd) $(top_srcdir) && \
|
||||
$(AUTOMAKE) --gnu genfft/Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
|
||||
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(am__aclocal_m4_deps):
|
||||
|
||||
mostlyclean-libtool:
|
||||
-rm -f *.lo
|
||||
|
||||
clean-libtool:
|
||||
-rm -rf .libs _libs
|
||||
tags TAGS:
|
||||
|
||||
ctags CTAGS:
|
||||
|
||||
cscope cscopelist:
|
||||
|
||||
|
||||
distdir: $(BUILT_SOURCES)
|
||||
$(MAKE) $(AM_MAKEFLAGS) distdir-am
|
||||
|
||||
distdir-am: $(DISTFILES)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d "$(distdir)/$$file"; then \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||
else \
|
||||
test -f "$(distdir)/$$file" \
|
||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
check-am: all-am
|
||||
check: check-am
|
||||
all-am: Makefile all-local
|
||||
installdirs:
|
||||
install: install-am
|
||||
install-exec: install-exec-am
|
||||
install-data: install-data-am
|
||||
uninstall: uninstall-am
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-am
|
||||
install-strip:
|
||||
if test -z '$(STRIP)'; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
install; \
|
||||
else \
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||
fi
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-am
|
||||
|
||||
clean-am: clean-generic clean-libtool mostlyclean-am
|
||||
|
||||
distclean: distclean-am
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-generic
|
||||
|
||||
dvi: dvi-am
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-am
|
||||
|
||||
html-am:
|
||||
|
||||
info: info-am
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am:
|
||||
|
||||
install-dvi: install-dvi-am
|
||||
|
||||
install-dvi-am:
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-am
|
||||
|
||||
install-html-am:
|
||||
|
||||
install-info: install-info-am
|
||||
|
||||
install-info-am:
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-am
|
||||
|
||||
install-pdf-am:
|
||||
|
||||
install-ps: install-ps-am
|
||||
|
||||
install-ps-am:
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-am
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic \
|
||||
maintainer-clean-local
|
||||
|
||||
mostlyclean: mostlyclean-am
|
||||
|
||||
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
|
||||
|
||||
pdf: pdf-am
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-am
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am:
|
||||
|
||||
.MAKE: install-am install-strip
|
||||
|
||||
.PHONY: all all-am all-local check check-am clean clean-generic \
|
||||
clean-libtool cscopelist-am ctags-am distclean \
|
||||
distclean-generic distclean-libtool distdir dvi dvi-am html \
|
||||
html-am info info-am install install-am install-data \
|
||||
install-data-am install-dvi install-dvi-am install-exec \
|
||||
install-exec-am install-html install-html-am install-info \
|
||||
install-info-am install-man install-pdf install-pdf-am \
|
||||
install-ps install-ps-am install-strip installcheck \
|
||||
installcheck-am installdirs maintainer-clean \
|
||||
maintainer-clean-generic maintainer-clean-local mostlyclean \
|
||||
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
|
||||
tags-am uninstall uninstall-am
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
|
||||
all-local::
|
||||
$(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE)
|
||||
|
||||
maintainer-clean-local::
|
||||
$(OCAMLBUILD) -classic-display -clean
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
580
fftw-3.3.10/genfft/algsimp.ml
Normal file
580
fftw-3.3.10/genfft/algsimp.ml
Normal file
@@ -0,0 +1,580 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
|
||||
open Util
|
||||
open Expr
|
||||
|
||||
let node_insert x = Assoctable.insert Expr.hash x
|
||||
let node_lookup x = Assoctable.lookup Expr.hash (==) x
|
||||
|
||||
(*************************************************************
|
||||
* Algebraic simplifier/elimination of common subexpressions
|
||||
*************************************************************)
|
||||
module AlgSimp : sig
|
||||
val algsimp : expr list -> expr list
|
||||
end = struct
|
||||
|
||||
open Monads.StateMonad
|
||||
open Monads.MemoMonad
|
||||
open Assoctable
|
||||
|
||||
let fetchSimp =
|
||||
fetchState >>= fun (s, _) -> returnM s
|
||||
let storeSimp s =
|
||||
fetchState >>= (fun (_, c) -> storeState (s, c))
|
||||
let lookupSimpM key =
|
||||
fetchSimp >>= fun table ->
|
||||
returnM (node_lookup key table)
|
||||
let insertSimpM key value =
|
||||
fetchSimp >>= fun table ->
|
||||
storeSimp (node_insert key value table)
|
||||
|
||||
let subset a b =
|
||||
List.for_all (fun x -> List.exists (fun y -> x == y) b) a
|
||||
|
||||
let structurallyEqualCSE a b =
|
||||
match (a, b) with
|
||||
| (Num a, Num b) -> Number.equal a b
|
||||
| (NaN a, NaN b) -> a == b
|
||||
| (Load a, Load b) -> Variable.same a b
|
||||
| (Times (a, a'), Times (b, b')) ->
|
||||
((a == b) && (a' == b')) ||
|
||||
((a == b') && (a' == b))
|
||||
| (CTimes (a, a'), CTimes (b, b')) ->
|
||||
((a == b) && (a' == b')) ||
|
||||
((a == b') && (a' == b))
|
||||
| (CTimesJ (a, a'), CTimesJ (b, b')) -> ((a == b) && (a' == b'))
|
||||
| (Plus a, Plus b) -> subset a b && subset b a
|
||||
| (Uminus a, Uminus b) -> (a == b)
|
||||
| _ -> false
|
||||
|
||||
let hashCSE x =
|
||||
if (!Magic.randomized_cse) then
|
||||
Oracle.hash x
|
||||
else
|
||||
Expr.hash x
|
||||
|
||||
let equalCSE a b =
|
||||
if (!Magic.randomized_cse) then
|
||||
(structurallyEqualCSE a b || Oracle.likely_equal a b)
|
||||
else
|
||||
structurallyEqualCSE a b
|
||||
|
||||
let fetchCSE =
|
||||
fetchState >>= fun (_, c) -> returnM c
|
||||
let storeCSE c =
|
||||
fetchState >>= (fun (s, _) -> storeState (s, c))
|
||||
let lookupCSEM key =
|
||||
fetchCSE >>= fun table ->
|
||||
returnM (Assoctable.lookup hashCSE equalCSE key table)
|
||||
let insertCSEM key value =
|
||||
fetchCSE >>= fun table ->
|
||||
storeCSE (Assoctable.insert hashCSE key value table)
|
||||
|
||||
(* memoize both x and Uminus x (unless x is already negated) *)
|
||||
let identityM x =
|
||||
let memo x = memoizing lookupCSEM insertCSEM returnM x in
|
||||
match x with
|
||||
Uminus _ -> memo x
|
||||
| _ -> memo x >>= fun x' -> memo (Uminus x') >> returnM x'
|
||||
|
||||
let makeNode = identityM
|
||||
|
||||
(* simplifiers for various kinds of nodes *)
|
||||
let rec snumM = function
|
||||
n when Number.is_zero n ->
|
||||
makeNode (Num (Number.zero))
|
||||
| n when Number.negative n ->
|
||||
makeNode (Num (Number.negate n)) >>= suminusM
|
||||
| n -> makeNode (Num n)
|
||||
|
||||
and suminusM = function
|
||||
Uminus x -> makeNode x
|
||||
| Num a when (Number.is_zero a) -> snumM Number.zero
|
||||
| a -> makeNode (Uminus a)
|
||||
|
||||
and stimesM = function
|
||||
| (Uminus a, b) -> stimesM (a, b) >>= suminusM
|
||||
| (a, Uminus b) -> stimesM (a, b) >>= suminusM
|
||||
| (NaN I, CTimes (a, b)) -> stimesM (NaN I, b) >>=
|
||||
fun ib -> sctimesM (a, ib)
|
||||
| (NaN I, CTimesJ (a, b)) -> stimesM (NaN I, b) >>=
|
||||
fun ib -> sctimesjM (a, ib)
|
||||
| (Num a, Num b) -> snumM (Number.mul a b)
|
||||
| (Num a, Times (Num b, c)) ->
|
||||
snumM (Number.mul a b) >>= fun x -> stimesM (x, c)
|
||||
| (Num a, b) when Number.is_zero a -> snumM Number.zero
|
||||
| (Num a, b) when Number.is_one a -> makeNode b
|
||||
| (Num a, b) when Number.is_mone a -> suminusM b
|
||||
| (a, b) when is_known_constant b && not (is_known_constant a) ->
|
||||
stimesM (b, a)
|
||||
| (a, b) -> makeNode (Times (a, b))
|
||||
|
||||
and sctimesM = function
|
||||
| (Uminus a, b) -> sctimesM (a, b) >>= suminusM
|
||||
| (a, Uminus b) -> sctimesM (a, b) >>= suminusM
|
||||
| (a, b) -> makeNode (CTimes (a, b))
|
||||
|
||||
and sctimesjM = function
|
||||
| (Uminus a, b) -> sctimesjM (a, b) >>= suminusM
|
||||
| (a, Uminus b) -> sctimesjM (a, b) >>= suminusM
|
||||
| (a, b) -> makeNode (CTimesJ (a, b))
|
||||
|
||||
and reduce_sumM x = match x with
|
||||
[] -> returnM []
|
||||
| [Num a] ->
|
||||
if (Number.is_zero a) then
|
||||
returnM []
|
||||
else returnM x
|
||||
| [Uminus (Num a)] ->
|
||||
if (Number.is_zero a) then
|
||||
returnM []
|
||||
else returnM x
|
||||
| (Num a) :: (Num b) :: s ->
|
||||
snumM (Number.add a b) >>= fun x ->
|
||||
reduce_sumM (x :: s)
|
||||
| (Num a) :: (Uminus (Num b)) :: s ->
|
||||
snumM (Number.sub a b) >>= fun x ->
|
||||
reduce_sumM (x :: s)
|
||||
| (Uminus (Num a)) :: (Num b) :: s ->
|
||||
snumM (Number.sub b a) >>= fun x ->
|
||||
reduce_sumM (x :: s)
|
||||
| (Uminus (Num a)) :: (Uminus (Num b)) :: s ->
|
||||
snumM (Number.add a b) >>=
|
||||
suminusM >>= fun x ->
|
||||
reduce_sumM (x :: s)
|
||||
| ((Num _) as a) :: b :: s -> reduce_sumM (b :: a :: s)
|
||||
| ((Uminus (Num _)) as a) :: b :: s -> reduce_sumM (b :: a :: s)
|
||||
| a :: s ->
|
||||
reduce_sumM s >>= fun s' -> returnM (a :: s')
|
||||
|
||||
and collectible1 = function
|
||||
| NaN _ -> false
|
||||
| Uminus x -> collectible1 x
|
||||
| _ -> true
|
||||
and collectible (a, b) = collectible1 a
|
||||
|
||||
(* collect common factors: ax + bx -> (a+b)x *)
|
||||
and collectM which x =
|
||||
let rec findCoeffM which = function
|
||||
| Times (a, b) when collectible (which (a, b)) -> returnM (which (a, b))
|
||||
| Uminus x ->
|
||||
findCoeffM which x >>= fun (coeff, b) ->
|
||||
suminusM coeff >>= fun mcoeff ->
|
||||
returnM (mcoeff, b)
|
||||
| x -> snumM Number.one >>= fun one -> returnM (one, x)
|
||||
and separateM xpr = function
|
||||
[] -> returnM ([], [])
|
||||
| a :: b ->
|
||||
separateM xpr b >>= fun (w, wo) ->
|
||||
(* try first factor *)
|
||||
findCoeffM (fun (a, b) -> (a, b)) a >>= fun (c, x) ->
|
||||
if (xpr == x) && collectible (c, x) then returnM (c :: w, wo)
|
||||
else
|
||||
(* try second factor *)
|
||||
findCoeffM (fun (a, b) -> (b, a)) a >>= fun (c, x) ->
|
||||
if (xpr == x) && collectible (c, x) then returnM (c :: w, wo)
|
||||
else returnM (w, a :: wo)
|
||||
in match x with
|
||||
[] -> returnM x
|
||||
| [a] -> returnM x
|
||||
| a :: b ->
|
||||
findCoeffM which a >>= fun (_, xpr) ->
|
||||
separateM xpr x >>= fun (w, wo) ->
|
||||
collectM which wo >>= fun wo' ->
|
||||
splusM w >>= fun w' ->
|
||||
stimesM (w', xpr) >>= fun t' ->
|
||||
returnM (t':: wo')
|
||||
|
||||
and mangleSumM x = returnM x
|
||||
>>= reduce_sumM
|
||||
>>= collectM (fun (a, b) -> (a, b))
|
||||
>>= collectM (fun (a, b) -> (b, a))
|
||||
>>= reduce_sumM
|
||||
>>= deepCollectM !Magic.deep_collect_depth
|
||||
>>= reduce_sumM
|
||||
|
||||
and reorder_uminus = function (* push all Uminuses to the end *)
|
||||
[] -> []
|
||||
| ((Uminus _) as a' :: b) -> (reorder_uminus b) @ [a']
|
||||
| (a :: b) -> a :: (reorder_uminus b)
|
||||
|
||||
and canonicalizeM = function
|
||||
[] -> snumM Number.zero
|
||||
| [a] -> makeNode a (* one term *)
|
||||
| a -> generateFusedMultAddM (reorder_uminus a)
|
||||
|
||||
and generateFusedMultAddM =
|
||||
let rec is_multiplication = function
|
||||
| Times (Num a, b) -> true
|
||||
| Uminus (Times (Num a, b)) -> true
|
||||
| _ -> false
|
||||
and separate = function
|
||||
[] -> ([], [], Number.zero)
|
||||
| (Times (Num a, b)) as this :: c ->
|
||||
let (x, y, max) = separate c in
|
||||
let newmax = if (Number.greater a max) then a else max in
|
||||
(this :: x, y, newmax)
|
||||
| (Uminus (Times (Num a, b))) as this :: c ->
|
||||
let (x, y, max) = separate c in
|
||||
let newmax = if (Number.greater a max) then a else max in
|
||||
(this :: x, y, newmax)
|
||||
| this :: c ->
|
||||
let (x, y, max) = separate c in
|
||||
(x, this :: y, max)
|
||||
in fun l ->
|
||||
if !Magic.enable_fma && count is_multiplication l >= 2 then
|
||||
let (w, wo, max) = separate l in
|
||||
snumM (Number.div Number.one max) >>= fun invmax' ->
|
||||
snumM max >>= fun max' ->
|
||||
mapM (fun x -> stimesM (invmax', x)) w >>= splusM >>= fun pw' ->
|
||||
stimesM (max', pw') >>= fun mw' ->
|
||||
splusM (wo @ [mw'])
|
||||
else
|
||||
makeNode (Plus l)
|
||||
|
||||
|
||||
and negative = function
|
||||
Uminus _ -> true
|
||||
| _ -> false
|
||||
|
||||
(*
|
||||
* simplify patterns of the form
|
||||
*
|
||||
* ((c_1 * a + ...) + ...) + (c_2 * a + ...)
|
||||
*
|
||||
* The pattern includes arbitrary coefficients and minus signs.
|
||||
* A common case of this pattern is the butterfly
|
||||
* (a + b) + (a - b)
|
||||
* (a + b) - (a - b)
|
||||
*)
|
||||
(* this whole procedure needs much more thought *)
|
||||
and deepCollectM maxdepth l =
|
||||
let rec findTerms depth x = match x with
|
||||
| Uminus x -> findTerms depth x
|
||||
| Times (Num _, b) -> (findTerms (depth - 1) b)
|
||||
| Plus l when depth > 0 ->
|
||||
x :: List.flatten (List.map (findTerms (depth - 1)) l)
|
||||
| x -> [x]
|
||||
and duplicates = function
|
||||
[] -> []
|
||||
| a :: b -> if List.memq a b then a :: duplicates b
|
||||
else duplicates b
|
||||
|
||||
in let rec splitDuplicates depth d x =
|
||||
if (List.memq x d) then
|
||||
snumM (Number.zero) >>= fun zero ->
|
||||
returnM (zero, x)
|
||||
else match x with
|
||||
| Times (a, b) ->
|
||||
splitDuplicates (depth - 1) d a >>= fun (a', xa) ->
|
||||
splitDuplicates (depth - 1) d b >>= fun (b', xb) ->
|
||||
stimesM (a', b') >>= fun ab ->
|
||||
stimesM (a, xb) >>= fun xb' ->
|
||||
stimesM (xa, b) >>= fun xa' ->
|
||||
stimesM (xa, xb) >>= fun xab ->
|
||||
splusM [xa'; xb'; xab] >>= fun x ->
|
||||
returnM (ab, x)
|
||||
| Uminus a ->
|
||||
splitDuplicates depth d a >>= fun (x, y) ->
|
||||
suminusM x >>= fun ux ->
|
||||
suminusM y >>= fun uy ->
|
||||
returnM (ux, uy)
|
||||
| Plus l when depth > 0 ->
|
||||
mapM (splitDuplicates (depth - 1) d) l >>= fun ld ->
|
||||
let (l', d') = List.split ld in
|
||||
splusM l' >>= fun p ->
|
||||
splusM d' >>= fun d'' ->
|
||||
returnM (p, d'')
|
||||
| x ->
|
||||
snumM (Number.zero) >>= fun zero' ->
|
||||
returnM (x, zero')
|
||||
|
||||
in let l' = List.flatten (List.map (findTerms maxdepth) l)
|
||||
in match duplicates l' with
|
||||
| [] -> returnM l
|
||||
| d ->
|
||||
mapM (splitDuplicates maxdepth d) l >>= fun ld ->
|
||||
let (l', d') = List.split ld in
|
||||
splusM l' >>= fun l'' ->
|
||||
let rec flattenPlusM = function
|
||||
| Plus l -> returnM l
|
||||
| Uminus x ->
|
||||
flattenPlusM x >>= mapM suminusM
|
||||
| x -> returnM [x]
|
||||
in
|
||||
mapM flattenPlusM d' >>= fun d'' ->
|
||||
splusM (List.flatten d'') >>= fun d''' ->
|
||||
mangleSumM [l''; d''']
|
||||
|
||||
and splusM l =
|
||||
let fma_heuristics x =
|
||||
if !Magic.enable_fma then
|
||||
match x with
|
||||
| [Uminus (Times _); Times _] -> Some false
|
||||
| [Times _; Uminus (Times _)] -> Some false
|
||||
| [Uminus (_); Times _] -> Some true
|
||||
| [Times _; Uminus (Plus _)] -> Some true
|
||||
| [_; Uminus (Times _)] -> Some false
|
||||
| [Uminus (Times _); _] -> Some false
|
||||
| _ -> None
|
||||
else
|
||||
None
|
||||
in
|
||||
mangleSumM l >>= fun l' ->
|
||||
(* no terms are negative. Don't do anything *)
|
||||
if not (List.exists negative l') then
|
||||
canonicalizeM l'
|
||||
(* all terms are negative. Negate them all and collect the minus sign *)
|
||||
else if List.for_all negative l' then
|
||||
mapM suminusM l' >>= splusM >>= suminusM
|
||||
else match fma_heuristics l' with
|
||||
| Some true -> mapM suminusM l' >>= splusM >>= suminusM
|
||||
| Some false -> canonicalizeM l'
|
||||
| None ->
|
||||
(* Ask the Oracle for the canonical form *)
|
||||
if (not !Magic.randomized_cse) &&
|
||||
Oracle.should_flip_sign (Plus l') then
|
||||
mapM suminusM l' >>= splusM >>= suminusM
|
||||
else
|
||||
canonicalizeM l'
|
||||
|
||||
(* monadic style algebraic simplifier for the dag *)
|
||||
let rec algsimpM x =
|
||||
memoizing lookupSimpM insertSimpM
|
||||
(function
|
||||
| Num a -> snumM a
|
||||
| NaN _ as x -> makeNode x
|
||||
| Plus a ->
|
||||
mapM algsimpM a >>= splusM
|
||||
| Times (a, b) ->
|
||||
(algsimpM a >>= fun a' ->
|
||||
algsimpM b >>= fun b' ->
|
||||
stimesM (a', b'))
|
||||
| CTimes (a, b) ->
|
||||
(algsimpM a >>= fun a' ->
|
||||
algsimpM b >>= fun b' ->
|
||||
sctimesM (a', b'))
|
||||
| CTimesJ (a, b) ->
|
||||
(algsimpM a >>= fun a' ->
|
||||
algsimpM b >>= fun b' ->
|
||||
sctimesjM (a', b'))
|
||||
| Uminus a ->
|
||||
algsimpM a >>= suminusM
|
||||
| Store (v, a) ->
|
||||
algsimpM a >>= fun a' ->
|
||||
makeNode (Store (v, a'))
|
||||
| Load _ as x -> makeNode x)
|
||||
x
|
||||
|
||||
let initialTable = (empty, empty)
|
||||
let simp_roots = mapM algsimpM
|
||||
let algsimp = runM initialTable simp_roots
|
||||
end
|
||||
|
||||
(*************************************************************
|
||||
* Network transposition algorithm
|
||||
*************************************************************)
|
||||
module Transpose = struct
|
||||
open Monads.StateMonad
|
||||
open Monads.MemoMonad
|
||||
open Littlesimp
|
||||
|
||||
let fetchDuals = fetchState
|
||||
let storeDuals = storeState
|
||||
|
||||
let lookupDualsM key =
|
||||
fetchDuals >>= fun table ->
|
||||
returnM (node_lookup key table)
|
||||
|
||||
let insertDualsM key value =
|
||||
fetchDuals >>= fun table ->
|
||||
storeDuals (node_insert key value table)
|
||||
|
||||
let rec visit visited vtable parent_table = function
|
||||
[] -> (visited, parent_table)
|
||||
| node :: rest ->
|
||||
match node_lookup node vtable with
|
||||
| Some _ -> visit visited vtable parent_table rest
|
||||
| None ->
|
||||
let children = match node with
|
||||
| Store (v, n) -> [n]
|
||||
| Plus l -> l
|
||||
| Times (a, b) -> [a; b]
|
||||
| CTimes (a, b) -> [a; b]
|
||||
| CTimesJ (a, b) -> [a; b]
|
||||
| Uminus x -> [x]
|
||||
| _ -> []
|
||||
in let rec loop t = function
|
||||
[] -> t
|
||||
| a :: rest ->
|
||||
(match node_lookup a t with
|
||||
None -> loop (node_insert a [node] t) rest
|
||||
| Some c -> loop (node_insert a (node :: c) t) rest)
|
||||
in
|
||||
(visit
|
||||
(node :: visited)
|
||||
(node_insert node () vtable)
|
||||
(loop parent_table children)
|
||||
(children @ rest))
|
||||
|
||||
let make_transposer parent_table =
|
||||
let rec termM node candidate_parent =
|
||||
match candidate_parent with
|
||||
| Store (_, n) when n == node ->
|
||||
dualM candidate_parent >>= fun x' -> returnM [x']
|
||||
| Plus (l) when List.memq node l ->
|
||||
dualM candidate_parent >>= fun x' -> returnM [x']
|
||||
| Times (a, b) when b == node ->
|
||||
dualM candidate_parent >>= fun x' ->
|
||||
returnM [makeTimes (a, x')]
|
||||
| CTimes (a, b) when b == node ->
|
||||
dualM candidate_parent >>= fun x' ->
|
||||
returnM [CTimes (a, x')]
|
||||
| CTimesJ (a, b) when b == node ->
|
||||
dualM candidate_parent >>= fun x' ->
|
||||
returnM [CTimesJ (a, x')]
|
||||
| Uminus n when n == node ->
|
||||
dualM candidate_parent >>= fun x' ->
|
||||
returnM [makeUminus x']
|
||||
| _ -> returnM []
|
||||
|
||||
and dualExpressionM this_node =
|
||||
mapM (termM this_node)
|
||||
(match node_lookup this_node parent_table with
|
||||
| Some a -> a
|
||||
| None -> failwith "bug in dualExpressionM"
|
||||
) >>= fun l ->
|
||||
returnM (makePlus (List.flatten l))
|
||||
|
||||
and dualM this_node =
|
||||
memoizing lookupDualsM insertDualsM
|
||||
(function
|
||||
| Load v as x ->
|
||||
if (Variable.is_constant v) then
|
||||
returnM (Load v)
|
||||
else
|
||||
(dualExpressionM x >>= fun d ->
|
||||
returnM (Store (v, d)))
|
||||
| Store (v, x) -> returnM (Load v)
|
||||
| x -> dualExpressionM x)
|
||||
this_node
|
||||
|
||||
in dualM
|
||||
|
||||
let is_store = function
|
||||
| Store _ -> true
|
||||
| _ -> false
|
||||
|
||||
let transpose dag =
|
||||
let _ = Util.info "begin transpose" in
|
||||
let (all_nodes, parent_table) =
|
||||
visit [] Assoctable.empty Assoctable.empty dag in
|
||||
let transposerM = make_transposer parent_table in
|
||||
let mapTransposerM = mapM transposerM in
|
||||
let duals = runM Assoctable.empty mapTransposerM all_nodes in
|
||||
let roots = List.filter is_store duals in
|
||||
let _ = Util.info "end transpose" in
|
||||
roots
|
||||
end
|
||||
|
||||
|
||||
(*************************************************************
|
||||
* Various dag statistics
|
||||
*************************************************************)
|
||||
module Stats : sig
|
||||
type complexity
|
||||
val complexity : Expr.expr list -> complexity
|
||||
val same_complexity : complexity -> complexity -> bool
|
||||
val leq_complexity : complexity -> complexity -> bool
|
||||
val to_string : complexity -> string
|
||||
end = struct
|
||||
type complexity = int * int * int * int * int * int
|
||||
let rec visit visited vtable = function
|
||||
[] -> visited
|
||||
| node :: rest ->
|
||||
match node_lookup node vtable with
|
||||
Some _ -> visit visited vtable rest
|
||||
| None ->
|
||||
let children = match node with
|
||||
Store (v, n) -> [n]
|
||||
| Plus l -> l
|
||||
| Times (a, b) -> [a; b]
|
||||
| Uminus x -> [x]
|
||||
| _ -> []
|
||||
in visit (node :: visited)
|
||||
(node_insert node () vtable)
|
||||
(children @ rest)
|
||||
|
||||
let complexity dag =
|
||||
let rec loop (load, store, plus, times, uminus, num) = function
|
||||
[] -> (load, store, plus, times, uminus, num)
|
||||
| node :: rest ->
|
||||
loop
|
||||
(match node with
|
||||
| Load _ -> (load + 1, store, plus, times, uminus, num)
|
||||
| Store _ -> (load, store + 1, plus, times, uminus, num)
|
||||
| Plus x -> (load, store, plus + (List.length x - 1), times, uminus, num)
|
||||
| Times _ -> (load, store, plus, times + 1, uminus, num)
|
||||
| Uminus _ -> (load, store, plus, times, uminus + 1, num)
|
||||
| Num _ -> (load, store, plus, times, uminus, num + 1)
|
||||
| CTimes _ -> (load, store, plus, times, uminus, num)
|
||||
| CTimesJ _ -> (load, store, plus, times, uminus, num)
|
||||
| NaN _ -> (load, store, plus, times, uminus, num))
|
||||
rest
|
||||
in let (l, s, p, t, u, n) =
|
||||
loop (0, 0, 0, 0, 0, 0) (visit [] Assoctable.empty dag)
|
||||
in (l, s, p, t, u, n)
|
||||
|
||||
let weight (l, s, p, t, u, n) =
|
||||
l + s + 10 * p + 20 * t + u + n
|
||||
|
||||
let same_complexity a b = weight a = weight b
|
||||
let leq_complexity a b = weight a <= weight b
|
||||
|
||||
let to_string (l, s, p, t, u, n) =
|
||||
Printf.sprintf "ld=%d st=%d add=%d mul=%d uminus=%d num=%d\n"
|
||||
l s p t u n
|
||||
|
||||
end
|
||||
|
||||
(* simplify the dag *)
|
||||
let algsimp v =
|
||||
let rec simplification_loop v =
|
||||
let () = Util.info "simplification step" in
|
||||
let complexity = Stats.complexity v in
|
||||
let () = Util.info ("complexity = " ^ (Stats.to_string complexity)) in
|
||||
let v = (AlgSimp.algsimp @@ Transpose.transpose @@
|
||||
AlgSimp.algsimp @@ Transpose.transpose) v in
|
||||
let complexity' = Stats.complexity v in
|
||||
let () = Util.info ("complexity = " ^ (Stats.to_string complexity')) in
|
||||
if (Stats.leq_complexity complexity' complexity) then
|
||||
let () = Util.info "end algsimp" in
|
||||
v
|
||||
else
|
||||
simplification_loop v
|
||||
|
||||
in
|
||||
let () = Util.info "begin algsimp" in
|
||||
let v = AlgSimp.algsimp v in
|
||||
if !Magic.network_transposition then simplification_loop v else v
|
||||
|
||||
22
fftw-3.3.10/genfft/algsimp.mli
Normal file
22
fftw-3.3.10/genfft/algsimp.mli
Normal file
@@ -0,0 +1,22 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val algsimp : Expr.expr list -> Expr.expr list
|
||||
361
fftw-3.3.10/genfft/annotate.ml
Normal file
361
fftw-3.3.10/genfft/annotate.ml
Normal file
@@ -0,0 +1,361 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* Here, we take a schedule (produced by schedule.ml) ordering a
|
||||
sequence of instructions, and produce an annotated schedule. The
|
||||
annotated schedule has the same ordering as the original schedule,
|
||||
but is additionally partitioned into nested blocks of temporary
|
||||
variables. The partitioning is computed via a heuristic algorithm.
|
||||
|
||||
The blocking allows the C code that we generate to consist of
|
||||
nested blocks that help communicate variable lifetimes to the
|
||||
compiler. *)
|
||||
|
||||
open Schedule
|
||||
open Expr
|
||||
open Variable
|
||||
|
||||
type annotated_schedule =
|
||||
Annotate of variable list * variable list * variable list * int * aschedule
|
||||
and aschedule =
|
||||
ADone
|
||||
| AInstr of assignment
|
||||
| ASeq of (annotated_schedule * annotated_schedule)
|
||||
|
||||
let addelem a set = if not (List.memq a set) then a :: set else set
|
||||
let union l =
|
||||
let f x = addelem x (* let is source of polymorphism *)
|
||||
in List.fold_right f l
|
||||
|
||||
(* set difference a - b *)
|
||||
let diff a b = List.filter (fun x -> not (List.memq x b)) a
|
||||
|
||||
let rec minimize f = function
|
||||
[] -> failwith "minimize"
|
||||
| [n] -> n
|
||||
| n :: rest ->
|
||||
let x = minimize f rest in
|
||||
if (f x) >= (f n) then n else x
|
||||
|
||||
(* find all variables used inside a scheduling unit *)
|
||||
let rec find_block_vars = function
|
||||
Done -> []
|
||||
| (Instr (Assign (v, x))) -> v :: (find_vars x)
|
||||
| Par a -> List.flatten (List.map find_block_vars a)
|
||||
| Seq (a, b) -> (find_block_vars a) @ (find_block_vars b)
|
||||
|
||||
let uniq l =
|
||||
List.fold_right (fun a b -> if List.memq a b then b else a :: b) l []
|
||||
|
||||
let has_related x = List.exists (Variable.same_class x)
|
||||
|
||||
let rec overlap a b = Util.count (fun y -> has_related y b) a
|
||||
|
||||
(* reorder a list of schedules so as to maximize overlap of variables *)
|
||||
let reorder l =
|
||||
let rec loop = function
|
||||
[] -> []
|
||||
| (a, va) :: b ->
|
||||
let c =
|
||||
List.map
|
||||
(fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in
|
||||
let c' =
|
||||
List.sort
|
||||
(fun (_, (a, la)) (_, (b, lb)) ->
|
||||
if la < lb || a > b then -1 else 1)
|
||||
c in
|
||||
let b' = List.map (fun (a, _) -> a) c' in
|
||||
a :: (loop b') in
|
||||
let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in
|
||||
(* start with smallest block --- does this matter ? *)
|
||||
match l' with
|
||||
[] -> []
|
||||
| _ ->
|
||||
let m = minimize (fun (_, x) -> (List.length x)) l' in
|
||||
let l'' = Util.remove m l' in
|
||||
loop (m :: l'')
|
||||
|
||||
(* remove Par blocks *)
|
||||
let rec linearize = function
|
||||
| Seq (a, Done) -> linearize a
|
||||
| Seq (Done, a) -> linearize a
|
||||
| Seq (a, b) -> Seq (linearize a, linearize b)
|
||||
|
||||
(* try to balance nested Par blocks *)
|
||||
| Par [a] -> linearize a
|
||||
| Par l ->
|
||||
let n2 = (List.length l) / 2 in
|
||||
let rec loop n a b =
|
||||
if n = 0 then
|
||||
(List.rev b, a)
|
||||
else
|
||||
match a with
|
||||
[] -> failwith "loop"
|
||||
| x :: y -> loop (n - 1) y (x :: b)
|
||||
in let (a, b) = loop n2 (reorder l) []
|
||||
in linearize (Seq (Par a, Par b))
|
||||
|
||||
| x -> x
|
||||
|
||||
let subset a b =
|
||||
List.for_all (fun x -> List.exists (fun y -> x == y) b) a
|
||||
|
||||
let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) =
|
||||
is_temporary av &&
|
||||
is_temporary bv &&
|
||||
(let va = Expr.find_vars ax and vb = Expr.find_vars bx in
|
||||
subset va vb && subset vb va)
|
||||
|
||||
let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) =
|
||||
is_locative av &&
|
||||
is_locative bv &&
|
||||
Variable.same_class av bv
|
||||
|
||||
let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) =
|
||||
match (ax, bx) with
|
||||
| (Load a), (Load b) when
|
||||
Variable.is_locative a && Variable.is_locative b
|
||||
-> Variable.same_class a b
|
||||
| _ -> false
|
||||
|
||||
(* extract instructions from schedule *)
|
||||
let rec sched_to_ilist = function
|
||||
| Done -> []
|
||||
| Instr a -> [a]
|
||||
| Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b)
|
||||
| _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *)
|
||||
|
||||
let rec find_friends friendp insn friends foes = function
|
||||
| [] -> (friends, foes)
|
||||
| a :: b ->
|
||||
if (a == insn) || (friendp a insn) then
|
||||
find_friends friendp insn (a :: friends) foes b
|
||||
else
|
||||
find_friends friendp insn friends (a :: foes) b
|
||||
|
||||
(* schedule all instructions in the equivalence class determined
|
||||
by friendp at the point where the last one
|
||||
is executed *)
|
||||
let rec delay_friends friendp sched =
|
||||
let rec recur insns = function
|
||||
| Done -> (Done, insns)
|
||||
| Instr a ->
|
||||
let (friends, foes) = find_friends friendp a [] [] insns in
|
||||
(Schedule.sequentially friends), foes
|
||||
| Seq (a, b) ->
|
||||
let (b', insnsb) = recur insns b in
|
||||
let (a', insnsa) = recur insnsb a in
|
||||
(Seq (a', b')), insnsa
|
||||
| _ -> failwith "delay_friends"
|
||||
in match recur (sched_to_ilist sched) sched with
|
||||
| (s, []) -> s (* assert that all insns have been used *)
|
||||
| _ -> failwith "delay_friends"
|
||||
|
||||
(* schedule all instructions in the equivalence class determined
|
||||
by friendp at the point where the first one
|
||||
is executed *)
|
||||
let rec anticipate_friends friendp sched =
|
||||
let rec recur insns = function
|
||||
| Done -> (Done, insns)
|
||||
| Instr a ->
|
||||
let (friends, foes) = find_friends friendp a [] [] insns in
|
||||
(Schedule.sequentially friends), foes
|
||||
| Seq (a, b) ->
|
||||
let (a', insnsa) = recur insns a in
|
||||
let (b', insnsb) = recur insnsa b in
|
||||
(Seq (a', b')), insnsb
|
||||
| _ -> failwith "anticipate_friends"
|
||||
in match recur (sched_to_ilist sched) sched with
|
||||
| (s, []) -> s (* assert that all insns have been used *)
|
||||
| _ -> failwith "anticipate_friends"
|
||||
|
||||
let collect_buddy_stores buddy_list sched =
|
||||
let rec recur sched delayed_stores = match sched with
|
||||
| Done -> (sched, delayed_stores)
|
||||
| Instr (Assign (v, x)) ->
|
||||
begin
|
||||
try
|
||||
let buddies = List.find (List.memq v) buddy_list in
|
||||
let tmp = Variable.make_temporary () in
|
||||
let i = Seq(Instr (Assign (tmp, x)),
|
||||
Instr (Assign (v, Times (NaN MULTI_A, Load tmp))))
|
||||
and delayed_stores = (v, Load tmp) :: delayed_stores in
|
||||
try
|
||||
(Seq (i,
|
||||
Instr (Assign
|
||||
(List.hd buddies,
|
||||
Times (NaN MULTI_B,
|
||||
Plus (List.map
|
||||
(fun buddy ->
|
||||
List.assq buddy
|
||||
delayed_stores)
|
||||
buddies))) )))
|
||||
, delayed_stores
|
||||
with Not_found -> (i, delayed_stores)
|
||||
with Not_found -> (sched, delayed_stores)
|
||||
end
|
||||
| Seq (a, b) ->
|
||||
let (newa, delayed_stores) = recur a delayed_stores in
|
||||
let (newb, delayed_stores) = recur b delayed_stores in
|
||||
(Seq (newa, newb), delayed_stores)
|
||||
| _ -> failwith "collect_buddy_stores"
|
||||
in let (sched, _) = recur sched [] in
|
||||
sched
|
||||
|
||||
let schedule_for_pipeline sched =
|
||||
let update_readytimes t (Assign (v, _)) ready_times =
|
||||
(v, (t + !Magic.pipeline_latency)) :: ready_times
|
||||
and readyp t ready_times (Assign (_, x)) =
|
||||
List.for_all
|
||||
(fun var ->
|
||||
try
|
||||
(List.assq var ready_times) <= t
|
||||
with Not_found -> false)
|
||||
(List.filter Variable.is_temporary (Expr.find_vars x))
|
||||
in
|
||||
let rec recur sched t ready_times delayed_instructions =
|
||||
let (ready, not_ready) =
|
||||
List.partition (readyp t ready_times) delayed_instructions
|
||||
in match ready with
|
||||
| a :: b ->
|
||||
let (sched, t, ready_times, delayed_instructions) =
|
||||
recur sched (t+1) (update_readytimes t a ready_times)
|
||||
(b @ not_ready)
|
||||
in
|
||||
(Seq (Instr a, sched)), t, ready_times, delayed_instructions
|
||||
| _ -> (match sched with
|
||||
| Done -> (sched, t, ready_times, delayed_instructions)
|
||||
| Instr a ->
|
||||
if (readyp t ready_times a) then
|
||||
(sched, (t+1), (update_readytimes t a ready_times),
|
||||
delayed_instructions)
|
||||
else
|
||||
(Done, t, ready_times, (a :: delayed_instructions))
|
||||
| Seq (a, b) ->
|
||||
let (a, t, ready_times, delayed_instructions) =
|
||||
recur a t ready_times delayed_instructions
|
||||
in
|
||||
let (b, t, ready_times, delayed_instructions) =
|
||||
recur b t ready_times delayed_instructions
|
||||
in (Seq (a, b)), t, ready_times, delayed_instructions
|
||||
| _ -> failwith "schedule_for_pipeline")
|
||||
in let rec recur_until_done sched t ready_times delayed_instructions =
|
||||
let (sched, t, ready_times, delayed_instructions) =
|
||||
recur sched t ready_times delayed_instructions
|
||||
in match delayed_instructions with
|
||||
| [] -> sched
|
||||
| _ ->
|
||||
(Seq (sched,
|
||||
(recur_until_done Done (t+1) ready_times
|
||||
delayed_instructions)))
|
||||
in recur_until_done sched 0 [] []
|
||||
|
||||
let rec rewrite_declarations force_declarations
|
||||
(Annotate (_, _, declared, _, what)) =
|
||||
let m = !Magic.number_of_variables in
|
||||
|
||||
let declare_it declared =
|
||||
if (force_declarations || List.length declared >= m) then
|
||||
([], declared)
|
||||
else
|
||||
(declared, [])
|
||||
|
||||
in match what with
|
||||
ADone -> Annotate ([], [], [], 0, what)
|
||||
| AInstr i ->
|
||||
let (u, d) = declare_it declared
|
||||
in Annotate ([], u, d, 0, what)
|
||||
| ASeq (a, b) ->
|
||||
let ma = rewrite_declarations false a
|
||||
and mb = rewrite_declarations false b
|
||||
in let Annotate (_, ua, _, _, _) = ma
|
||||
and Annotate (_, ub, _, _, _) = mb
|
||||
in let (u, d) = declare_it (declared @ ua @ ub)
|
||||
in Annotate ([], u, d, 0, ASeq (ma, mb))
|
||||
|
||||
let annotate list_of_buddy_stores schedule =
|
||||
let rec analyze live_at_end = function
|
||||
Done -> Annotate (live_at_end, [], [], 0, ADone)
|
||||
| Instr i -> (match i with
|
||||
Assign (v, x) ->
|
||||
let vars = (find_vars x) in
|
||||
Annotate (Util.remove v (union live_at_end vars), [v], [],
|
||||
0, AInstr i))
|
||||
| Seq (a, b) ->
|
||||
let ab = analyze live_at_end b in
|
||||
let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in
|
||||
let aa = analyze live_at_begin_b a in
|
||||
let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in
|
||||
let defined = List.filter is_temporary (defined_a @ defined_b) in
|
||||
let declarable = diff defined live_at_end in
|
||||
let undeclarable = diff defined declarable
|
||||
and maxdepth = max depth_a depth_b in
|
||||
Annotate (live_at_begin_a, undeclarable, declarable,
|
||||
List.length declarable + maxdepth,
|
||||
ASeq (aa, ab))
|
||||
| _ -> failwith "really_analyze"
|
||||
|
||||
in
|
||||
let () = Util.info "begin annotate" in
|
||||
let x = linearize schedule in
|
||||
|
||||
let x =
|
||||
if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then
|
||||
schedule_for_pipeline x
|
||||
else
|
||||
x
|
||||
in
|
||||
|
||||
let x =
|
||||
if !Magic.reorder_insns then
|
||||
linearize(anticipate_friends use_same_vars x)
|
||||
else
|
||||
x
|
||||
in
|
||||
|
||||
(* delay stores to the real and imaginary parts of the same number *)
|
||||
let x =
|
||||
if !Magic.reorder_stores then
|
||||
linearize(delay_friends store_to_same_class x)
|
||||
else
|
||||
x
|
||||
in
|
||||
|
||||
(* move loads of the real and imaginary parts of the same number *)
|
||||
let x =
|
||||
if !Magic.reorder_loads then
|
||||
linearize(anticipate_friends loads_from_same_class x)
|
||||
else
|
||||
x
|
||||
in
|
||||
|
||||
let x = collect_buddy_stores list_of_buddy_stores x in
|
||||
let x = analyze [] x in
|
||||
let res = rewrite_declarations true x in
|
||||
let () = Util.info "end annotate" in
|
||||
res
|
||||
|
||||
let rec dump print (Annotate (_, _, _, _, code)) =
|
||||
dump_code print code
|
||||
and dump_code print = function
|
||||
| ADone -> ()
|
||||
| AInstr x -> print ((assignment_to_string x) ^ "\n")
|
||||
| ASeq (a, b) -> dump print a; dump print b
|
||||
36
fftw-3.3.10/genfft/annotate.mli
Normal file
36
fftw-3.3.10/genfft/annotate.mli
Normal file
@@ -0,0 +1,36 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Variable
|
||||
open Expr
|
||||
|
||||
type annotated_schedule =
|
||||
Annotate of variable list * variable list * variable list *
|
||||
int * aschedule
|
||||
and aschedule =
|
||||
ADone
|
||||
| AInstr of assignment
|
||||
| ASeq of (annotated_schedule * annotated_schedule)
|
||||
|
||||
val annotate :
|
||||
variable list list -> Schedule.schedule -> annotated_schedule
|
||||
|
||||
val dump : (string -> unit) -> annotated_schedule -> unit
|
||||
65
fftw-3.3.10/genfft/assoctable.ml
Normal file
65
fftw-3.3.10/genfft/assoctable.ml
Normal file
@@ -0,0 +1,65 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*************************************************************
|
||||
* Functional associative table
|
||||
*************************************************************)
|
||||
|
||||
(*
|
||||
* this module implements a functional associative table.
|
||||
* The table is parametrized by an equality predicate and
|
||||
* a hash function, with the restriction that (equal a b) ==>
|
||||
* hash a == hash b.
|
||||
* The table is purely functional and implemented using a binary
|
||||
* search tree (not balanced for now)
|
||||
*)
|
||||
|
||||
type ('a, 'b) elem =
|
||||
Leaf
|
||||
| Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list
|
||||
|
||||
let empty = Leaf
|
||||
|
||||
let lookup hash equal key table =
|
||||
let h = hash key in
|
||||
let rec look = function
|
||||
Leaf -> None
|
||||
| Node (hash_key, left, right, this_list) ->
|
||||
if (hash_key < h) then look left
|
||||
else if (hash_key > h) then look right
|
||||
else let rec loop = function
|
||||
[] -> None
|
||||
| (a, b) :: rest -> if (equal key a) then Some b else loop rest
|
||||
in loop this_list
|
||||
in look table
|
||||
|
||||
let insert hash key value table =
|
||||
let h = hash key in
|
||||
let rec ins = function
|
||||
Leaf -> Node (h, Leaf, Leaf, [(key, value)])
|
||||
| Node (hash_key, left, right, this_list) ->
|
||||
if (hash_key < h) then
|
||||
Node (hash_key, ins left, right, this_list)
|
||||
else if (hash_key > h) then
|
||||
Node (hash_key, left, ins right, this_list)
|
||||
else
|
||||
Node (hash_key, left, right, (key, value) :: this_list)
|
||||
in ins table
|
||||
29
fftw-3.3.10/genfft/assoctable.mli
Normal file
29
fftw-3.3.10/genfft/assoctable.mli
Normal file
@@ -0,0 +1,29 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type ('a, 'b) elem =
|
||||
| Leaf
|
||||
| Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list
|
||||
val empty : ('a, 'b) elem
|
||||
val lookup :
|
||||
('a -> int) -> ('a -> 'b -> bool) -> 'a -> ('b, 'c) elem -> 'c option
|
||||
val insert :
|
||||
('a -> int) -> 'a -> 'c -> ('a, 'c) elem -> ('a, 'c) elem
|
||||
461
fftw-3.3.10/genfft/c.ml
Normal file
461
fftw-3.3.10/genfft/c.ml
Normal file
@@ -0,0 +1,461 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*
|
||||
* This module contains the definition of a C-like abstract
|
||||
* syntax tree, and functions to convert ML values into C
|
||||
* programs
|
||||
*)
|
||||
|
||||
open Expr
|
||||
open Annotate
|
||||
open List
|
||||
|
||||
let realtype = "R"
|
||||
let realtypep = realtype ^ " *"
|
||||
let extended_realtype = "E"
|
||||
let constrealtype = "const " ^ realtype
|
||||
let constrealtypep = constrealtype ^ " *"
|
||||
|
||||
let stridetype = "stride"
|
||||
|
||||
(***********************************
|
||||
* C program structure
|
||||
***********************************)
|
||||
type c_decl =
|
||||
| Decl of string * string
|
||||
| Tdecl of string (* arbitrary text declaration *)
|
||||
|
||||
and c_ast =
|
||||
| Asch of annotated_schedule
|
||||
| Simd_leavefun
|
||||
| Return of c_ast
|
||||
| For of c_ast * c_ast * c_ast * c_ast
|
||||
| If of c_ast * c_ast
|
||||
| Block of (c_decl list) * (c_ast list)
|
||||
| Binop of string * c_ast * c_ast
|
||||
| Expr_assign of c_ast * c_ast
|
||||
| Stmt_assign of c_ast * c_ast
|
||||
| Comma of c_ast * c_ast
|
||||
| Integer of int
|
||||
| CVar of string
|
||||
| CCall of string * c_ast
|
||||
| CPlus of c_ast list
|
||||
| ITimes of c_ast * c_ast
|
||||
| CUminus of c_ast
|
||||
and c_fcn = Fcn of string * string * (c_decl list) * c_ast
|
||||
|
||||
|
||||
let ctimes = function
|
||||
| (Integer 1), a -> a
|
||||
| a, (Integer 1) -> a
|
||||
| a, b -> ITimes (a, b)
|
||||
|
||||
(*
|
||||
* C AST unparser
|
||||
*)
|
||||
let foldr_string_concat l = fold_right (^) l ""
|
||||
|
||||
let rec unparse_expr_c =
|
||||
let yes x = x and no x = "" in
|
||||
|
||||
let rec unparse_plus maybe =
|
||||
let maybep = maybe " + " in
|
||||
function
|
||||
| [] -> ""
|
||||
| (Uminus (Times (a, b))) :: (Uminus c) :: d ->
|
||||
maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d)
|
||||
| (Uminus c) :: (Uminus (Times (a, b))) :: d ->
|
||||
maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d)
|
||||
| (Uminus (Times (a, b))) :: c :: d ->
|
||||
maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d)
|
||||
| c :: (Uminus (Times (a, b))) :: d ->
|
||||
maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d)
|
||||
| (Times (a, b)) :: (Uminus c) :: d ->
|
||||
maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d)
|
||||
| (Uminus c) :: (Times (a, b)) :: d ->
|
||||
maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d)
|
||||
| (Times (a, b)) :: c :: d ->
|
||||
maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d)
|
||||
| c :: (Times (a, b)) :: d ->
|
||||
maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d)
|
||||
| (Uminus a :: b) ->
|
||||
" - " ^ (parenthesize a) ^ (unparse_plus yes b)
|
||||
| (a :: b) ->
|
||||
maybep ^ (parenthesize a) ^ (unparse_plus yes b)
|
||||
and parenthesize x = match x with
|
||||
| (Load _) -> unparse_expr_c x
|
||||
| (Num _) -> unparse_expr_c x
|
||||
| _ -> "(" ^ (unparse_expr_c x) ^ ")"
|
||||
and op nam a b c =
|
||||
nam ^ "(" ^ (unparse_expr_c a) ^ ", " ^ (unparse_expr_c b) ^ ", " ^
|
||||
(unparse_expr_c c) ^ ")"
|
||||
|
||||
in function
|
||||
| Load v -> Variable.unparse v
|
||||
| Num n -> Number.to_konst n
|
||||
| Plus [] -> "0.0 /* bug */"
|
||||
| Plus [a] -> " /* bug */ " ^ (unparse_expr_c a)
|
||||
| Plus a -> (unparse_plus no a)
|
||||
| Times (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b)
|
||||
| Uminus (Plus [a; Uminus b]) -> unparse_plus no [b; Uminus a]
|
||||
| Uminus a -> "- " ^ (parenthesize a)
|
||||
| _ -> failwith "unparse_expr_c"
|
||||
|
||||
and unparse_expr_generic =
|
||||
let rec u x = unparse_expr_generic x
|
||||
and unary op a = Printf.sprintf "%s(%s)" op (u a)
|
||||
and binary op a b = Printf.sprintf "%s(%s, %s)" op (u a) (u b)
|
||||
and ternary op a b c = Printf.sprintf "%s(%s, %s, %s)" op (u a) (u b) (u c)
|
||||
and quaternary op a b c d =
|
||||
Printf.sprintf "%s(%s, %s, %s, %s)" op (u a) (u b) (u c) (u d)
|
||||
and unparse_plus = function
|
||||
| [(Uminus (Times (a, b))); Times (c, d)] -> quaternary "FNMMS" a b c d
|
||||
| [Times (c, d); (Uminus (Times (a, b)))] -> quaternary "FNMMS" a b c d
|
||||
| [Times (c, d); (Times (a, b))] -> quaternary "FMMA" a b c d
|
||||
| [(Uminus (Times (a, b))); c] -> ternary "FNMS" a b c
|
||||
| [c; (Uminus (Times (a, b)))] -> ternary "FNMS" a b c
|
||||
| [(Uminus c); (Times (a, b))] -> ternary "FMS" a b c
|
||||
| [(Times (a, b)); (Uminus c)] -> ternary "FMS" a b c
|
||||
| [c; (Times (a, b))] -> ternary "FMA" a b c
|
||||
| [(Times (a, b)); c] -> ternary "FMA" a b c
|
||||
| [a; Uminus b] -> binary "SUB" a b
|
||||
| [a; b] -> binary "ADD" a b
|
||||
| a :: b :: c -> binary "ADD" a (Plus (b :: c))
|
||||
| _ -> failwith "unparse_plus"
|
||||
in function
|
||||
| Load v -> Variable.unparse v
|
||||
| Num n -> Number.to_konst n
|
||||
| Plus a -> unparse_plus a
|
||||
| Times (a, b) -> binary "MUL" a b
|
||||
| Uminus a -> unary "NEG" a
|
||||
| _ -> failwith "unparse_expr"
|
||||
|
||||
and unparse_expr x =
|
||||
if !Magic.generic_arith then
|
||||
unparse_expr_generic x
|
||||
else
|
||||
unparse_expr_c x
|
||||
|
||||
and unparse_assignment (Assign (v, x)) =
|
||||
(Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n"
|
||||
|
||||
and unparse_annotated force_bracket =
|
||||
let rec unparse_code = function
|
||||
ADone -> ""
|
||||
| AInstr i -> unparse_assignment i
|
||||
| ASeq (a, b) ->
|
||||
(unparse_annotated false a) ^ (unparse_annotated false b)
|
||||
and declare_variables l =
|
||||
let rec uvar = function
|
||||
[] -> failwith "uvar"
|
||||
| [v] -> (Variable.unparse v) ^ ";\n"
|
||||
| a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b)
|
||||
in let rec vvar l =
|
||||
let s = if !Magic.compact then 15 else 1 in
|
||||
if (List.length l <= s) then
|
||||
match l with
|
||||
[] -> ""
|
||||
| _ -> extended_realtype ^ " " ^ (uvar l)
|
||||
else
|
||||
(vvar (Util.take s l)) ^ (vvar (Util.drop s l))
|
||||
in vvar (List.filter Variable.is_temporary l)
|
||||
in function
|
||||
Annotate (_, _, decl, _, code) ->
|
||||
if (not force_bracket) && (Util.null decl) then
|
||||
unparse_code code
|
||||
else "{\n" ^
|
||||
(declare_variables decl) ^
|
||||
(unparse_code code) ^
|
||||
"}\n"
|
||||
|
||||
and unparse_decl = function
|
||||
| Decl (a, b) -> a ^ " " ^ b ^ ";\n"
|
||||
| Tdecl x -> x
|
||||
|
||||
and unparse_ast =
|
||||
let rec unparse_plus = function
|
||||
| [] -> ""
|
||||
| (CUminus a :: b) -> " - " ^ (parenthesize a) ^ (unparse_plus b)
|
||||
| (a :: b) -> " + " ^ (parenthesize a) ^ (unparse_plus b)
|
||||
and parenthesize x = match x with
|
||||
| (CVar _) -> unparse_ast x
|
||||
| (CCall _) -> unparse_ast x
|
||||
| (Integer _) -> unparse_ast x
|
||||
| _ -> "(" ^ (unparse_ast x) ^ ")"
|
||||
|
||||
in
|
||||
function
|
||||
| Asch a -> (unparse_annotated true a)
|
||||
| Simd_leavefun -> "" (* used only in SIMD code *)
|
||||
| Return x -> "return " ^ unparse_ast x ^ ";"
|
||||
| For (a, b, c, d) ->
|
||||
"for (" ^
|
||||
unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c
|
||||
^ ")" ^ unparse_ast d
|
||||
| If (a, d) ->
|
||||
"if (" ^
|
||||
unparse_ast a
|
||||
^ ")" ^ unparse_ast d
|
||||
| Block (d, s) ->
|
||||
if (s == []) then ""
|
||||
else
|
||||
"{\n" ^
|
||||
foldr_string_concat (map unparse_decl d) ^
|
||||
foldr_string_concat (map unparse_ast s) ^
|
||||
"}\n"
|
||||
| Binop (op, a, b) -> (unparse_ast a) ^ op ^ (unparse_ast b)
|
||||
| Expr_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b)
|
||||
| Stmt_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b) ^ ";\n"
|
||||
| Comma (a, b) -> (unparse_ast a) ^ ", " ^ (unparse_ast b)
|
||||
| Integer i -> string_of_int i
|
||||
| CVar s -> s
|
||||
| CCall (s, x) -> s ^ "(" ^ (unparse_ast x) ^ ")"
|
||||
| CPlus [] -> "0 /* bug */"
|
||||
| CPlus [a] -> " /* bug */ " ^ (unparse_ast a)
|
||||
| CPlus (a::b) -> (parenthesize a) ^ (unparse_plus b)
|
||||
| ITimes (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b)
|
||||
| CUminus a -> "- " ^ (parenthesize a)
|
||||
|
||||
and unparse_function = function
|
||||
Fcn (typ, name, args, body) ->
|
||||
let rec unparse_args = function
|
||||
[Decl (a, b)] -> a ^ " " ^ b
|
||||
| (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", "
|
||||
^ unparse_args s
|
||||
| [] -> ""
|
||||
| _ -> failwith "unparse_function"
|
||||
in
|
||||
(typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^
|
||||
unparse_ast body)
|
||||
|
||||
|
||||
(*************************************************************
|
||||
* traverse a a function and return a list of all expressions,
|
||||
* in the execution order
|
||||
**************************************************************)
|
||||
let rec fcn_to_expr_list = fun (Fcn (_, _, _, body)) -> ast_to_expr_list body
|
||||
and acode_to_expr_list = function
|
||||
AInstr (Assign (_, x)) -> [x]
|
||||
| ASeq (a, b) ->
|
||||
(asched_to_expr_list a) @ (asched_to_expr_list b)
|
||||
| _ -> []
|
||||
and asched_to_expr_list (Annotate (_, _, _, _, code)) =
|
||||
acode_to_expr_list code
|
||||
and ast_to_expr_list = function
|
||||
Asch a -> asched_to_expr_list a
|
||||
| Block (_, a) -> flatten (map ast_to_expr_list a)
|
||||
| For (_, _, _, body) -> ast_to_expr_list body
|
||||
| If (_, body) -> ast_to_expr_list body
|
||||
| _ -> []
|
||||
|
||||
(***********************
|
||||
* Extracting Constants
|
||||
***********************)
|
||||
|
||||
(* add a new key & value to a list of (key,value) pairs, where
|
||||
the keys are floats and each key is unique up to almost_equal *)
|
||||
|
||||
let extract_constants f =
|
||||
let constlist = flatten (map expr_to_constants (ast_to_expr_list f))
|
||||
in map
|
||||
(fun n ->
|
||||
Tdecl
|
||||
("DK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^
|
||||
");\n"))
|
||||
(unique_constants constlist)
|
||||
|
||||
(******************************
|
||||
Extracting operation counts
|
||||
******************************)
|
||||
|
||||
let count_stack_vars =
|
||||
let rec count_acode = function
|
||||
| ASeq (a, b) -> max (count_asched a) (count_asched b)
|
||||
| _ -> 0
|
||||
and count_asched (Annotate (_, _, decl, _, code)) =
|
||||
(length decl) + (count_acode code)
|
||||
and count_ast = function
|
||||
| Asch a -> count_asched a
|
||||
| Block (d, a) -> (length d) + (Util.max_list (map count_ast a))
|
||||
| For (_, _, _, body) -> count_ast body
|
||||
| If (_, body) -> count_ast body
|
||||
| _ -> 0
|
||||
in function (Fcn (_, _, _, body)) -> count_ast body
|
||||
|
||||
let count_memory_acc f =
|
||||
let rec count_var v =
|
||||
if (Variable.is_locative v) then 1 else 0
|
||||
and count_acode = function
|
||||
| AInstr (Assign (v, _)) -> count_var v
|
||||
| ASeq (a, b) -> (count_asched a) + (count_asched b)
|
||||
| _ -> 0
|
||||
and count_asched = function
|
||||
Annotate (_, _, _, _, code) -> count_acode code
|
||||
and count_ast = function
|
||||
| Asch a -> count_asched a
|
||||
| Block (_, a) -> (Util.sum_list (map count_ast a))
|
||||
| Comma (a, b) -> (count_ast a) + (count_ast b)
|
||||
| For (_, _, _, body) -> count_ast body
|
||||
| If (_, body) -> count_ast body
|
||||
| _ -> 0
|
||||
and count_acc_expr_func acc = function
|
||||
| Load v -> acc + (count_var v)
|
||||
| Plus a -> fold_left count_acc_expr_func acc a
|
||||
| Times (a, b) -> fold_left count_acc_expr_func acc [a; b]
|
||||
| Uminus a -> count_acc_expr_func acc a
|
||||
| _ -> acc
|
||||
in let (Fcn (typ, name, args, body)) = f
|
||||
in (count_ast body) +
|
||||
fold_left count_acc_expr_func 0 (fcn_to_expr_list f)
|
||||
|
||||
let good_for_fma = To_alist.good_for_fma
|
||||
|
||||
let build_fma = function
|
||||
| [a; Times (b, c)] when good_for_fma (b, c) -> Some (a, b, c)
|
||||
| [Times (b, c); a] when good_for_fma (b, c) -> Some (a, b, c)
|
||||
| [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> Some (a, b, c)
|
||||
| [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> Some (a, b, c)
|
||||
| _ -> None
|
||||
|
||||
let rec count_flops_expr_func (adds, mults, fmas) = function
|
||||
| Plus [] -> (adds, mults, fmas)
|
||||
| Plus ([_; _] as a) ->
|
||||
begin
|
||||
match build_fma a with
|
||||
| None ->
|
||||
fold_left count_flops_expr_func
|
||||
(adds + (length a) - 1, mults, fmas) a
|
||||
| Some (a, b, c) ->
|
||||
fold_left count_flops_expr_func (adds, mults, fmas+1) [a; b; c]
|
||||
end
|
||||
| Plus (a :: b) ->
|
||||
count_flops_expr_func (adds, mults, fmas) (Plus [a; Plus b])
|
||||
| Times (NaN MULTI_A,_) -> (adds, mults, fmas)
|
||||
| Times (NaN MULTI_B,_) -> (adds, mults, fmas)
|
||||
| Times (NaN I,b) -> count_flops_expr_func (adds, mults, fmas) b
|
||||
| Times (NaN CONJ,b) -> count_flops_expr_func (adds, mults, fmas) b
|
||||
| Times (a,b) -> fold_left count_flops_expr_func (adds, mults+1, fmas) [a; b]
|
||||
| CTimes (a,b) ->
|
||||
fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b]
|
||||
| CTimesJ (a,b) ->
|
||||
fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b]
|
||||
| Uminus a -> count_flops_expr_func (adds, mults, fmas) a
|
||||
| _ -> (adds, mults, fmas)
|
||||
|
||||
let count_flops f =
|
||||
fold_left count_flops_expr_func (0, 0, 0) (fcn_to_expr_list f)
|
||||
|
||||
let count_constants f =
|
||||
length (unique_constants (flatten (map expr_to_constants (fcn_to_expr_list f))))
|
||||
|
||||
let arith_complexity f =
|
||||
let (a, m, fmas) = count_flops f
|
||||
and v = count_stack_vars f
|
||||
and c = count_constants f
|
||||
and mem = count_memory_acc f
|
||||
in (a, m, fmas, v, c, mem)
|
||||
|
||||
(* print the operation costs *)
|
||||
let print_cost f =
|
||||
let Fcn (_, _, _, _) = f
|
||||
and (a, m, fmas, v, c, mem) = arith_complexity f
|
||||
in
|
||||
"/*\n"^
|
||||
" * This function contains " ^
|
||||
(string_of_int (a + fmas)) ^ " FP additions, " ^
|
||||
(string_of_int (m + fmas)) ^ " FP multiplications,\n" ^
|
||||
" * (or, " ^
|
||||
(string_of_int a) ^ " additions, " ^
|
||||
(string_of_int m) ^ " multiplications, " ^
|
||||
(string_of_int fmas) ^ " fused multiply/add),\n" ^
|
||||
" * " ^ (string_of_int v) ^ " stack variables, " ^
|
||||
(string_of_int c) ^ " constants, and " ^
|
||||
(string_of_int mem) ^ " memory accesses\n" ^
|
||||
" */\n"
|
||||
|
||||
(*****************************************
|
||||
* functions that create C arrays
|
||||
*****************************************)
|
||||
type stride =
|
||||
| SVar of string
|
||||
| SConst of string
|
||||
| SInteger of int
|
||||
| SNeg of stride
|
||||
|
||||
type sstride =
|
||||
| Simple of int
|
||||
| Constant of (string * int)
|
||||
| Composite of (string * int)
|
||||
| Negative of sstride
|
||||
|
||||
let rec simplify_stride stride i =
|
||||
match (stride, i) with
|
||||
(_, 0) -> Simple 0
|
||||
| (SInteger n, i) -> Simple (n * i)
|
||||
| (SConst s, i) -> Constant (s, i)
|
||||
| (SVar s, i) -> Composite (s, i)
|
||||
| (SNeg x, i) ->
|
||||
match (simplify_stride x i) with
|
||||
| Negative y -> y
|
||||
| y -> Negative y
|
||||
|
||||
let rec cstride_to_string = function
|
||||
| Simple i -> string_of_int i
|
||||
| Constant (s, i) ->
|
||||
if !Magic.lisp_syntax then
|
||||
"(* " ^ s ^ " " ^ (string_of_int i) ^ ")"
|
||||
else
|
||||
s ^ " * " ^ (string_of_int i)
|
||||
| Composite (s, i) ->
|
||||
if !Magic.lisp_syntax then
|
||||
"(* " ^ s ^ " " ^ (string_of_int i) ^ ")"
|
||||
else
|
||||
"WS(" ^ s ^ ", " ^ (string_of_int i) ^ ")"
|
||||
| Negative x -> "-" ^ cstride_to_string x
|
||||
|
||||
let aref name index =
|
||||
if !Magic.lisp_syntax then
|
||||
Printf.sprintf "(aref %s %s)" name index
|
||||
else
|
||||
Printf.sprintf "%s[%s]" name index
|
||||
|
||||
let array_subscript name stride k =
|
||||
aref name (cstride_to_string (simplify_stride stride k))
|
||||
|
||||
let varray_subscript name vstride stride v i =
|
||||
let vindex = simplify_stride vstride v
|
||||
and iindex = simplify_stride stride i
|
||||
in
|
||||
let index =
|
||||
match (vindex, iindex) with
|
||||
(Simple vi, Simple ii) -> string_of_int (vi + ii)
|
||||
| (Simple 0, x) -> cstride_to_string x
|
||||
| (x, Simple 0) -> cstride_to_string x
|
||||
| _ -> (cstride_to_string vindex) ^ " + " ^ (cstride_to_string iindex)
|
||||
in aref name index
|
||||
|
||||
let real_of s = "c_re(" ^ s ^ ")"
|
||||
let imag_of s = "c_im(" ^ s ^ ")"
|
||||
|
||||
let flops_of f =
|
||||
let (add, mul, fma) = count_flops f in
|
||||
Printf.sprintf "{ %d, %d, %d, 0 }" add mul fma
|
||||
74
fftw-3.3.10/genfft/c.mli
Normal file
74
fftw-3.3.10/genfft/c.mli
Normal file
@@ -0,0 +1,74 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type stride =
|
||||
| SVar of string
|
||||
| SConst of string
|
||||
| SInteger of int
|
||||
| SNeg of stride
|
||||
val array_subscript : string -> stride -> int -> string
|
||||
val varray_subscript : string -> stride -> stride -> int -> int -> string
|
||||
|
||||
val real_of : string -> string
|
||||
val imag_of : string -> string
|
||||
|
||||
val realtype : string
|
||||
val realtypep : string
|
||||
val constrealtype : string
|
||||
val constrealtypep : string
|
||||
val stridetype : string
|
||||
|
||||
type c_decl =
|
||||
| Decl of string * string
|
||||
| Tdecl of string (* arbitrary text declaration *)
|
||||
|
||||
and c_ast =
|
||||
| Asch of Annotate.annotated_schedule
|
||||
| Simd_leavefun
|
||||
| Return of c_ast
|
||||
| For of c_ast * c_ast * c_ast * c_ast
|
||||
| If of c_ast * c_ast
|
||||
| Block of (c_decl list) * (c_ast list)
|
||||
| Binop of string * c_ast * c_ast
|
||||
| Expr_assign of c_ast * c_ast
|
||||
| Stmt_assign of c_ast * c_ast
|
||||
| Comma of c_ast * c_ast
|
||||
| Integer of int
|
||||
| CVar of string
|
||||
| CCall of string * c_ast
|
||||
| CPlus of c_ast list
|
||||
| ITimes of c_ast * c_ast
|
||||
| CUminus of c_ast
|
||||
and c_fcn = | Fcn of string * string * c_decl list * c_ast
|
||||
|
||||
val unparse_expr : Expr.expr -> string
|
||||
val unparse_assignment : Expr.assignment -> string
|
||||
val unparse_annotated : bool -> Annotate.annotated_schedule -> string
|
||||
val unparse_decl : c_decl -> string
|
||||
val unparse_ast : c_ast -> string
|
||||
val unparse_function : c_fcn -> string
|
||||
|
||||
val flops_of : c_fcn -> string
|
||||
val print_cost : c_fcn -> string
|
||||
|
||||
val ast_to_expr_list : c_ast -> Expr.expr list
|
||||
val extract_constants : c_ast -> c_decl list
|
||||
val ctimes : (c_ast * c_ast) -> c_ast
|
||||
147
fftw-3.3.10/genfft/complex.ml
Normal file
147
fftw-3.3.10/genfft/complex.ml
Normal file
@@ -0,0 +1,147 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* abstraction layer for complex operations *)
|
||||
open Littlesimp
|
||||
open Expr
|
||||
|
||||
(* type of complex expressions *)
|
||||
type expr = CE of Expr.expr * Expr.expr
|
||||
|
||||
let two = CE (makeNum Number.two, makeNum Number.zero)
|
||||
let one = CE (makeNum Number.one, makeNum Number.zero)
|
||||
let i = CE (makeNum Number.zero, makeNum Number.one)
|
||||
let zero = CE (makeNum Number.zero, makeNum Number.zero)
|
||||
let make (r, i) = CE (r, i)
|
||||
|
||||
let uminus (CE (a, b)) = CE (makeUminus a, makeUminus b)
|
||||
|
||||
let inverse_int n = CE (makeNum (Number.div Number.one (Number.of_int n)),
|
||||
makeNum Number.zero)
|
||||
|
||||
let inverse_int_sqrt n =
|
||||
CE (makeNum (Number.div Number.one (Number.sqrt (Number.of_int n))),
|
||||
makeNum Number.zero)
|
||||
let int_sqrt n =
|
||||
CE (makeNum (Number.sqrt (Number.of_int n)),
|
||||
makeNum Number.zero)
|
||||
|
||||
let nan x = CE (NaN x, makeNum Number.zero)
|
||||
|
||||
let half = inverse_int 2
|
||||
|
||||
let times3x3 (CE (a, b)) (CE (c, d)) =
|
||||
CE (makePlus [makeTimes (c, makePlus [a; makeUminus (b)]);
|
||||
makeTimes (b, makePlus [c; makeUminus (d)])],
|
||||
makePlus [makeTimes (a, makePlus [c; d]);
|
||||
makeUminus(makeTimes (c, makePlus [a; makeUminus (b)]))])
|
||||
|
||||
let times (CE (a, b)) (CE (c, d)) =
|
||||
if not !Magic.threemult then
|
||||
CE (makePlus [makeTimes (a, c); makeUminus (makeTimes (b, d))],
|
||||
makePlus [makeTimes (a, d); makeTimes (b, c)])
|
||||
else if is_constant c && is_constant d then
|
||||
times3x3 (CE (a, b)) (CE (c, d))
|
||||
else (* hope a and b are constant expressions *)
|
||||
times3x3 (CE (c, d)) (CE (a, b))
|
||||
|
||||
let ctimes (CE (a, _)) (CE (c, _)) =
|
||||
CE (CTimes (a, c), makeNum Number.zero)
|
||||
|
||||
let ctimesj (CE (a, _)) (CE (c, _)) =
|
||||
CE (CTimesJ (a, c), makeNum Number.zero)
|
||||
|
||||
(* complex exponential (of root of unity); returns exp(2*pi*i/n * m) *)
|
||||
let exp n i =
|
||||
let (c, s) = Number.cexp n i
|
||||
in CE (makeNum c, makeNum s)
|
||||
|
||||
(* various trig functions evaluated at (2*pi*i/n * m) *)
|
||||
let sec n m =
|
||||
let (c, s) = Number.cexp n m
|
||||
in CE (makeNum (Number.div Number.one c), makeNum Number.zero)
|
||||
let csc n m =
|
||||
let (c, s) = Number.cexp n m
|
||||
in CE (makeNum (Number.div Number.one s), makeNum Number.zero)
|
||||
let tan n m =
|
||||
let (c, s) = Number.cexp n m
|
||||
in CE (makeNum (Number.div s c), makeNum Number.zero)
|
||||
let cot n m =
|
||||
let (c, s) = Number.cexp n m
|
||||
in CE (makeNum (Number.div c s), makeNum Number.zero)
|
||||
|
||||
(* complex sum *)
|
||||
let plus a =
|
||||
let rec unzip_complex = function
|
||||
[] -> ([], [])
|
||||
| ((CE (a, b)) :: s) ->
|
||||
let (r,i) = unzip_complex s
|
||||
in
|
||||
(a::r), (b::i) in
|
||||
let (c, d) = unzip_complex a in
|
||||
CE (makePlus c, makePlus d)
|
||||
|
||||
(* extract real/imaginary *)
|
||||
let real (CE (a, b)) = CE (a, makeNum Number.zero)
|
||||
let imag (CE (a, b)) = CE (b, makeNum Number.zero)
|
||||
let iimag (CE (a, b)) = CE (makeNum Number.zero, b)
|
||||
let conj (CE (a, b)) = CE (a, makeUminus b)
|
||||
|
||||
|
||||
(* abstraction of sum_{i=0}^{n-1} *)
|
||||
let sigma a b f = plus (List.map f (Util.interval a b))
|
||||
|
||||
(* store and assignment operations *)
|
||||
let store_real v (CE (a, b)) = Expr.Store (v, a)
|
||||
let store_imag v (CE (a, b)) = Expr.Store (v, b)
|
||||
let store (vr, vi) x = (store_real vr x, store_imag vi x)
|
||||
|
||||
let assign_real v (CE (a, b)) = Expr.Assign (v, a)
|
||||
let assign_imag v (CE (a, b)) = Expr.Assign (v, b)
|
||||
let assign (vr, vi) x = (assign_real vr x, assign_imag vi x)
|
||||
|
||||
|
||||
(************************
|
||||
shortcuts
|
||||
************************)
|
||||
let (@*) = times
|
||||
let (@+) a b = plus [a; b]
|
||||
let (@-) a b = plus [a; uminus b]
|
||||
|
||||
(* type of complex signals *)
|
||||
type signal = int -> expr
|
||||
|
||||
(* make a finite signal infinite *)
|
||||
let infinite n signal i = if ((0 <= i) && (i < n)) then signal i else zero
|
||||
|
||||
let hermitian n a =
|
||||
Util.array n (fun i ->
|
||||
if (i = 0) then real (a 0)
|
||||
else if (i < n - i) then (a i)
|
||||
else if (i > n - i) then conj (a (n - i))
|
||||
else real (a i))
|
||||
|
||||
let antihermitian n a =
|
||||
Util.array n (fun i ->
|
||||
if (i = 0) then iimag (a 0)
|
||||
else if (i < n - i) then (a i)
|
||||
else if (i > n - i) then uminus (conj (a (n - i)))
|
||||
else iimag (a i))
|
||||
68
fftw-3.3.10/genfft/complex.mli
Normal file
68
fftw-3.3.10/genfft/complex.mli
Normal file
@@ -0,0 +1,68 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type expr
|
||||
val make : (Expr.expr * Expr.expr) -> expr
|
||||
val two : expr
|
||||
val one : expr
|
||||
val i : expr
|
||||
val zero : expr
|
||||
val half : expr
|
||||
val inverse_int : int -> expr
|
||||
val inverse_int_sqrt : int -> expr
|
||||
val int_sqrt : int -> expr
|
||||
val times : expr -> expr -> expr
|
||||
val ctimes : expr -> expr -> expr
|
||||
val ctimesj : expr -> expr -> expr
|
||||
val uminus : expr -> expr
|
||||
val exp : int -> int -> expr
|
||||
val sec : int -> int -> expr
|
||||
val csc : int -> int -> expr
|
||||
val tan : int -> int -> expr
|
||||
val cot : int -> int -> expr
|
||||
val plus : expr list -> expr
|
||||
val real : expr -> expr
|
||||
val imag : expr -> expr
|
||||
val conj : expr -> expr
|
||||
val nan : Expr.transcendent -> expr
|
||||
val sigma : int -> int -> (int -> expr) -> expr
|
||||
|
||||
val (@*) : expr -> expr -> expr
|
||||
val (@+) : expr -> expr -> expr
|
||||
val (@-) : expr -> expr -> expr
|
||||
|
||||
(* a signal is a map from integers to expressions *)
|
||||
type signal = int -> expr
|
||||
val infinite : int -> signal -> signal
|
||||
|
||||
val store_real : Variable.variable -> expr -> Expr.expr
|
||||
val store_imag : Variable.variable -> expr -> Expr.expr
|
||||
val store :
|
||||
Variable.variable * Variable.variable -> expr -> Expr.expr * Expr.expr
|
||||
|
||||
val assign_real : Variable.variable -> expr -> Expr.assignment
|
||||
val assign_imag : Variable.variable -> expr -> Expr.assignment
|
||||
val assign :
|
||||
Variable.variable * Variable.variable ->
|
||||
expr -> Expr.assignment * Expr.assignment
|
||||
|
||||
val hermitian : int -> (int -> expr) -> int -> expr
|
||||
val antihermitian : int -> (int -> expr) -> int -> expr
|
||||
130
fftw-3.3.10/genfft/conv.ml
Normal file
130
fftw-3.3.10/genfft/conv.ml
Normal file
@@ -0,0 +1,130 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Complex
|
||||
open Util
|
||||
|
||||
let polyphase m a ph i = a (m * i + ph)
|
||||
|
||||
let rec divmod n i =
|
||||
if (i < 0) then
|
||||
let (a, b) = divmod n (i + n)
|
||||
in (a - 1, b)
|
||||
else (i / n, i mod n)
|
||||
|
||||
let unpolyphase m a i = let (x, y) = divmod m i in a y x
|
||||
|
||||
let lift2 f a b i = f (a i) (b i)
|
||||
|
||||
(* convolution of signals A and B *)
|
||||
let rec conv na a nb b =
|
||||
let rec naive na a nb b i =
|
||||
sigma 0 na (fun j -> (a j) @* (b (i - j)))
|
||||
|
||||
and recur na a nb b =
|
||||
if (na <= 1 || nb <= 1) then
|
||||
naive na a nb b
|
||||
else
|
||||
let p = polyphase 2 in
|
||||
let ee = conv (na - na / 2) (p a 0) (nb - nb / 2) (p b 0)
|
||||
and eo = conv (na - na / 2) (p a 0) (nb / 2) (p b 1)
|
||||
and oe = conv (na / 2) (p a 1) (nb - nb / 2) (p b 0)
|
||||
and oo = conv (na / 2) (p a 1) (nb / 2) (p b 1) in
|
||||
unpolyphase 2 (function
|
||||
0 -> fun i -> (ee i) @+ (oo (i - 1))
|
||||
| 1 -> fun i -> (eo i) @+ (oe i)
|
||||
| _ -> failwith "recur")
|
||||
|
||||
|
||||
(* Karatsuba variant 1: (a+bx)(c+dx) = (ac+bdxx)+((a+b)(c+d)-ac-bd)x *)
|
||||
and karatsuba1 na a nb b =
|
||||
let p = polyphase 2 in
|
||||
let ae = p a 0 and nae = na - na / 2
|
||||
and ao = p a 1 and nao = na / 2
|
||||
and be = p b 0 and nbe = nb - nb / 2
|
||||
and bo = p b 1 and nbo = nb / 2 in
|
||||
let ae = infinite nae ae and ao = infinite nao ao
|
||||
and be = infinite nbe be and bo = infinite nbo bo in
|
||||
let aeo = lift2 (@+) ae ao and naeo = nae
|
||||
and beo = lift2 (@+) be bo and nbeo = nbe in
|
||||
let ee = conv nae ae nbe be
|
||||
and oo = conv nao ao nbo bo
|
||||
and eoeo = conv naeo aeo nbeo beo in
|
||||
|
||||
let q = function
|
||||
0 -> fun i -> (ee i) @+ (oo (i - 1))
|
||||
| 1 -> fun i -> (eoeo i) @- ((ee i) @+ (oo i))
|
||||
| _ -> failwith "karatsuba1" in
|
||||
unpolyphase 2 q
|
||||
|
||||
(* Karatsuba variant 2:
|
||||
(a+bx)(c+dx) = ((a+b)c-b(c-dxx))+x((a+b)c-a(c-d)) *)
|
||||
and karatsuba2 na a nb b =
|
||||
let p = polyphase 2 in
|
||||
let ae = p a 0 and nae = na - na / 2
|
||||
and ao = p a 1 and nao = na / 2
|
||||
and be = p b 0 and nbe = nb - nb / 2
|
||||
and bo = p b 1 and nbo = nb / 2 in
|
||||
let ae = infinite nae ae and ao = infinite nao ao
|
||||
and be = infinite nbe be and bo = infinite nbo bo in
|
||||
|
||||
let c1 = conv nae (lift2 (@+) ae ao) nbe be
|
||||
and c2 = conv nao ao (nbo + 1) (fun i -> be i @- bo (i - 1))
|
||||
and c3 = conv nae ae nbe (lift2 (@-) be bo) in
|
||||
|
||||
let q = function
|
||||
0 -> lift2 (@-) c1 c2
|
||||
| 1 -> lift2 (@-) c1 c3
|
||||
| _ -> failwith "karatsuba2" in
|
||||
unpolyphase 2 q
|
||||
|
||||
and karatsuba na a nb b =
|
||||
let m = na + nb - 1 in
|
||||
if (m < !Magic.karatsuba_min) then
|
||||
recur na a nb b
|
||||
else
|
||||
match !Magic.karatsuba_variant with
|
||||
1 -> karatsuba1 na a nb b
|
||||
| 2 -> karatsuba2 na a nb b
|
||||
| _ -> failwith "unknown karatsuba variant"
|
||||
|
||||
and via_circular na a nb b =
|
||||
let m = na + nb - 1 in
|
||||
if (m < !Magic.circular_min) then
|
||||
karatsuba na a nb b
|
||||
else
|
||||
let rec find_min n = if n >= m then n else find_min (2 * n) in
|
||||
circular (find_min 1) a b
|
||||
|
||||
in
|
||||
let a = infinite na a and b = infinite nb b in
|
||||
let res = array (na + nb - 1) (via_circular na a nb b) in
|
||||
infinite (na + nb - 1) res
|
||||
|
||||
and circular n a b =
|
||||
let via_dft n a b =
|
||||
let fa = Fft.dft (-1) n a
|
||||
and fb = Fft.dft (-1) n b
|
||||
and scale = inverse_int n in
|
||||
let fab i = ((fa i) @* (fb i)) @* scale in
|
||||
Fft.dft 1 n fab
|
||||
|
||||
in via_dft n a b
|
||||
22
fftw-3.3.10/genfft/conv.mli
Normal file
22
fftw-3.3.10/genfft/conv.mli
Normal file
@@ -0,0 +1,22 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val conv : int -> Complex.signal -> int -> Complex.signal -> Complex.signal
|
||||
109
fftw-3.3.10/genfft/dag.ml
Normal file
109
fftw-3.3.10/genfft/dag.ml
Normal file
@@ -0,0 +1,109 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
|
||||
(* Here, we have functions to transform a sequence of assignments
|
||||
(variable = expression) into a DAG (a directed, acyclic graph).
|
||||
The nodes of the DAG are the assignments, and the edges indicate
|
||||
dependencies. (The DAG is analyzed in the scheduler to find an
|
||||
efficient ordering of the assignments.)
|
||||
|
||||
This file also contains utilities to manipulate the DAG in various
|
||||
ways. *)
|
||||
|
||||
(********************************************
|
||||
* Dag structure
|
||||
********************************************)
|
||||
type color = RED | BLUE | BLACK | YELLOW
|
||||
|
||||
type dagnode =
|
||||
{ assigned: Variable.variable;
|
||||
mutable expression: Expr.expr;
|
||||
input_variables: Variable.variable list;
|
||||
mutable successors: dagnode list;
|
||||
mutable predecessors: dagnode list;
|
||||
mutable label: int;
|
||||
mutable color: color}
|
||||
|
||||
type dag = Dag of (dagnode list)
|
||||
|
||||
(* true if node uses v *)
|
||||
let node_uses v node =
|
||||
List.exists (Variable.same v) node.input_variables
|
||||
|
||||
(* true if assignment of v clobbers any input of node *)
|
||||
let node_clobbers node v =
|
||||
List.exists (Variable.same_location v) node.input_variables
|
||||
|
||||
(* true if nodeb depends on nodea *)
|
||||
let depends_on nodea nodeb =
|
||||
node_uses nodea.assigned nodeb ||
|
||||
node_clobbers nodea nodeb.assigned
|
||||
|
||||
(* transform an assignment list into a dag *)
|
||||
let makedag alist =
|
||||
let dag = List.map
|
||||
(fun assignment ->
|
||||
let (v, x) = assignment in
|
||||
{ assigned = v;
|
||||
expression = x;
|
||||
input_variables = Expr.find_vars x;
|
||||
successors = [];
|
||||
predecessors = [];
|
||||
label = 0;
|
||||
color = BLACK })
|
||||
alist
|
||||
in begin
|
||||
for_list dag (fun i ->
|
||||
for_list dag (fun j ->
|
||||
if depends_on i j then begin
|
||||
i.successors <- j :: i.successors;
|
||||
j.predecessors <- i :: j.predecessors;
|
||||
end));
|
||||
Dag dag;
|
||||
end
|
||||
|
||||
let map f (Dag dag) = Dag (List.map f dag)
|
||||
let for_all (Dag dag) f =
|
||||
(* type system loophole *)
|
||||
let make_unit _ = () in
|
||||
make_unit (List.map f dag)
|
||||
let to_list (Dag dag) = dag
|
||||
|
||||
let find_node f (Dag dag) = Util.find_elem f dag
|
||||
|
||||
(* breadth-first search *)
|
||||
let rec bfs (Dag dag) node init_label =
|
||||
let _ = node.label <- init_label in
|
||||
let rec loop = function
|
||||
[] -> ()
|
||||
| node :: rest ->
|
||||
let neighbors = node.predecessors @ node.successors in
|
||||
let m = min_list (List.map (fun node -> node.label) neighbors) in
|
||||
if (node.label > m + 1) then begin
|
||||
node.label <- m + 1;
|
||||
loop (rest @ neighbors);
|
||||
end else
|
||||
loop rest
|
||||
in let neighbors = node.predecessors @ node.successors in
|
||||
loop neighbors
|
||||
|
||||
43
fftw-3.3.10/genfft/dag.mli
Normal file
43
fftw-3.3.10/genfft/dag.mli
Normal file
@@ -0,0 +1,43 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
|
||||
type color = | RED | BLUE | BLACK | YELLOW
|
||||
|
||||
type dagnode =
|
||||
{ assigned: Variable.variable;
|
||||
mutable expression: Expr.expr;
|
||||
input_variables: Variable.variable list;
|
||||
mutable successors: dagnode list;
|
||||
mutable predecessors: dagnode list;
|
||||
mutable label: int;
|
||||
mutable color: color}
|
||||
|
||||
type dag
|
||||
|
||||
val makedag : (Variable.variable * Expr.expr) list -> dag
|
||||
|
||||
val map : (dagnode -> dagnode) -> dag -> dag
|
||||
val for_all : dag -> (dagnode -> unit) -> unit
|
||||
val to_list : dag -> (dagnode list)
|
||||
val bfs : dag -> dagnode -> int -> unit
|
||||
val find_node : (dagnode -> bool) -> dag -> dagnode option
|
||||
152
fftw-3.3.10/genfft/expr.ml
Normal file
152
fftw-3.3.10/genfft/expr.ml
Normal file
@@ -0,0 +1,152 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* Here, we define the data type encapsulating a symbolic arithmetic
|
||||
expression, and provide some routines for manipulating it. *)
|
||||
|
||||
(* I will regret this hack : *)
|
||||
(* NEWS: I did *)
|
||||
type transcendent = I | MULTI_A | MULTI_B | CONJ
|
||||
|
||||
type expr =
|
||||
| Num of Number.number
|
||||
| NaN of transcendent
|
||||
| Plus of expr list
|
||||
| Times of expr * expr
|
||||
| CTimes of expr * expr
|
||||
| CTimesJ of expr * expr (* CTimesJ (a, b) = conj(a) * b *)
|
||||
| Uminus of expr
|
||||
| Load of Variable.variable
|
||||
| Store of Variable.variable * expr
|
||||
|
||||
type assignment = Assign of Variable.variable * expr
|
||||
|
||||
(* various hash functions *)
|
||||
let hash_float x =
|
||||
let (mantissa, exponent) = frexp x
|
||||
in truncate (float_of_int(exponent) *. 1234.567 +. mantissa *. 10000.0)
|
||||
|
||||
let sum_list l = List.fold_right (+) l 0
|
||||
|
||||
let transcendent_to_float = function
|
||||
| I -> 2.718281828459045235360287471 (* any transcendent number will do *)
|
||||
| MULTI_A -> 0.6931471805599453094172321214
|
||||
| MULTI_B -> -0.3665129205816643270124391582
|
||||
| CONJ -> 0.6019072301972345747375400015
|
||||
|
||||
let rec hash = function
|
||||
| Num x -> hash_float (Number.to_float x)
|
||||
| NaN x -> hash_float (transcendent_to_float x)
|
||||
| Load v -> 1 + 1237 * Variable.hash v
|
||||
| Store (v, x) -> 2 * Variable.hash v - 2345 * hash x
|
||||
| Plus l -> 5 + 23451 * sum_list (List.map Hashtbl.hash l)
|
||||
| Times (a, b) -> 41 + 31415 * (Hashtbl.hash a + Hashtbl.hash b)
|
||||
| CTimes (a, b) -> 49 + 3245 * (Hashtbl.hash a + Hashtbl.hash b)
|
||||
| CTimesJ (a, b) -> 31 + 3471 * (Hashtbl.hash a + Hashtbl.hash b)
|
||||
| Uminus x -> 42 + 12345 * (hash x)
|
||||
|
||||
(* find all variables *)
|
||||
let rec find_vars x =
|
||||
match x with
|
||||
| Load y -> [y]
|
||||
| Plus l -> List.flatten (List.map find_vars l)
|
||||
| Times (a, b) -> (find_vars a) @ (find_vars b)
|
||||
| CTimes (a, b) -> (find_vars a) @ (find_vars b)
|
||||
| CTimesJ (a, b) -> (find_vars a) @ (find_vars b)
|
||||
| Uminus a -> find_vars a
|
||||
| _ -> []
|
||||
|
||||
|
||||
(* TRUE if expression is a constant *)
|
||||
let is_constant = function
|
||||
| Num _ -> true
|
||||
| NaN _ -> true
|
||||
| Load v -> Variable.is_constant v
|
||||
| _ -> false
|
||||
|
||||
let is_known_constant = function
|
||||
| Num _ -> true
|
||||
| NaN _ -> true
|
||||
| _ -> false
|
||||
|
||||
(* expr to string, used for debugging *)
|
||||
let rec foldr_string_concat l =
|
||||
match l with
|
||||
[] -> ""
|
||||
| [a] -> a
|
||||
| a :: b -> a ^ " " ^ (foldr_string_concat b)
|
||||
|
||||
let string_of_transcendent = function
|
||||
| I -> "I"
|
||||
| MULTI_A -> "MULTI_A"
|
||||
| MULTI_B -> "MULTI_B"
|
||||
| CONJ -> "CONJ"
|
||||
|
||||
let rec to_string = function
|
||||
| Load v -> Variable.unparse v
|
||||
| Num n -> string_of_float (Number.to_float n)
|
||||
| NaN n -> string_of_transcendent n
|
||||
| Plus x -> "(+ " ^ (foldr_string_concat (List.map to_string x)) ^ ")"
|
||||
| Times (a, b) -> "(* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
|
||||
| CTimes (a, b) -> "(c* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
|
||||
| CTimesJ (a, b) -> "(cj* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
|
||||
| Uminus a -> "(- " ^ (to_string a) ^ ")"
|
||||
| Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^
|
||||
(to_string a) ^ ")"
|
||||
|
||||
let rec to_string_a d x =
|
||||
if (d = 0) then "..." else match x with
|
||||
| Load v -> Variable.unparse v
|
||||
| Num n -> Number.to_konst n
|
||||
| NaN n -> string_of_transcendent n
|
||||
| Plus x -> "(+ " ^ (foldr_string_concat (List.map (to_string_a (d - 1)) x)) ^ ")"
|
||||
| Times (a, b) -> "(* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
|
||||
| CTimes (a, b) -> "(c* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
|
||||
| CTimesJ (a, b) -> "(cj* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
|
||||
| Uminus a -> "(- " ^ (to_string_a (d-1) a) ^ ")"
|
||||
| Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^
|
||||
(to_string_a (d-1) a) ^ ")"
|
||||
|
||||
let to_string = to_string_a 10
|
||||
|
||||
let assignment_to_string = function
|
||||
| Assign (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^ (to_string a) ^ ")"
|
||||
|
||||
let dump print = List.iter (fun x -> print ((assignment_to_string x) ^ "\n"))
|
||||
|
||||
(* find all constants in a given expression *)
|
||||
let rec expr_to_constants = function
|
||||
| Num n -> [n]
|
||||
| Plus a -> List.flatten (List.map expr_to_constants a)
|
||||
| Times (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
|
||||
| CTimes (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
|
||||
| CTimesJ (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
|
||||
| Uminus a -> expr_to_constants a
|
||||
| _ -> []
|
||||
|
||||
|
||||
let add_float_key_value list_so_far k =
|
||||
if List.exists (fun k2 -> Number.equal k k2) list_so_far then
|
||||
list_so_far
|
||||
else
|
||||
k :: list_so_far
|
||||
|
||||
let unique_constants = List.fold_left add_float_key_value []
|
||||
51
fftw-3.3.10/genfft/expr.mli
Normal file
51
fftw-3.3.10/genfft/expr.mli
Normal file
@@ -0,0 +1,51 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type transcendent = I | MULTI_A | MULTI_B | CONJ
|
||||
|
||||
type expr =
|
||||
| Num of Number.number
|
||||
| NaN of transcendent
|
||||
| Plus of expr list
|
||||
| Times of expr * expr
|
||||
| CTimes of expr * expr
|
||||
| CTimesJ of expr * expr
|
||||
| Uminus of expr
|
||||
| Load of Variable.variable
|
||||
| Store of Variable.variable * expr
|
||||
|
||||
type assignment = Assign of Variable.variable * expr
|
||||
|
||||
val hash_float : float -> int
|
||||
val hash : expr -> int
|
||||
val to_string : expr -> string
|
||||
val assignment_to_string : assignment -> string
|
||||
val transcendent_to_float : transcendent -> float
|
||||
val string_of_transcendent : transcendent -> string
|
||||
|
||||
val find_vars : expr -> Variable.variable list
|
||||
val is_constant : expr -> bool
|
||||
val is_known_constant : expr -> bool
|
||||
|
||||
val dump : (string -> unit) -> assignment list -> unit
|
||||
|
||||
val expr_to_constants : expr -> Number.number list
|
||||
val unique_constants : Number.number list -> Number.number list
|
||||
307
fftw-3.3.10/genfft/fft.ml
Normal file
307
fftw-3.3.10/genfft/fft.ml
Normal file
@@ -0,0 +1,307 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
|
||||
(* This is the part of the generator that actually computes the FFT
|
||||
in symbolic form *)
|
||||
|
||||
open Complex
|
||||
open Util
|
||||
|
||||
(* choose a suitable factor of n *)
|
||||
let choose_factor n =
|
||||
(* first choice: i such that gcd(i, n / i) = 1, i as big as possible *)
|
||||
let choose1 n =
|
||||
let rec loop i f =
|
||||
if (i * i > n) then f
|
||||
else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i
|
||||
else loop (i + 1) f
|
||||
in loop 1 1
|
||||
|
||||
(* second choice: the biggest factor i of n, where i < sqrt(n), if any *)
|
||||
and choose2 n =
|
||||
let rec loop i f =
|
||||
if (i * i > n) then f
|
||||
else if ((n mod i) == 0) then loop (i + 1) i
|
||||
else loop (i + 1) f
|
||||
in loop 1 1
|
||||
|
||||
in let i = choose1 n in
|
||||
if (i > 1) then i
|
||||
else choose2 n
|
||||
|
||||
let is_power_of_two n = (n > 0) && ((n - 1) land n == 0)
|
||||
|
||||
let rec dft_prime sign n input =
|
||||
let sum filter i =
|
||||
sigma 0 n (fun j ->
|
||||
let coeff = filter (exp n (sign * i * j))
|
||||
in coeff @* (input j)) in
|
||||
let computation_even = array n (sum identity)
|
||||
and computation_odd =
|
||||
let sumr = array n (sum real)
|
||||
and sumi = array n (sum ((times Complex.i) @@ imag)) in
|
||||
array n (fun i ->
|
||||
if (i = 0) then
|
||||
(* expose some common subexpressions *)
|
||||
input 0 @+
|
||||
sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j))
|
||||
else
|
||||
let i' = min i (n - i) in
|
||||
if (i < n - i) then
|
||||
sumr i' @+ sumi i'
|
||||
else
|
||||
sumr i' @- sumi i') in
|
||||
if (n >= !Magic.rader_min) then
|
||||
dft_rader sign n input
|
||||
else if (n == 2) then
|
||||
computation_even
|
||||
else
|
||||
computation_odd
|
||||
|
||||
|
||||
and dft_rader sign p input =
|
||||
let half =
|
||||
let one_half = inverse_int 2 in
|
||||
times one_half
|
||||
|
||||
and make_product n a b =
|
||||
let scale_factor = inverse_int n in
|
||||
array n (fun i -> a i @* (scale_factor @* b i)) in
|
||||
|
||||
(* generates a convolution using ffts. (all arguments are the
|
||||
same as to gen_convolution, below) *)
|
||||
let gen_convolution_by_fft n a b addtoall =
|
||||
let fft_a = dft 1 n a
|
||||
and fft_b = dft 1 n b in
|
||||
|
||||
let fft_ab = make_product n fft_a fft_b
|
||||
and dc_term i = if (i == 0) then addtoall else zero in
|
||||
|
||||
let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i)
|
||||
and sum = fft_a 0 in
|
||||
let conv = dft (-1) n fft_ab1 in
|
||||
(sum, conv)
|
||||
|
||||
(* alternate routine for convolution. Seems to work better for
|
||||
small sizes. I have no idea why. *)
|
||||
and gen_convolution_by_fft_alt n a b addtoall =
|
||||
let ap = array n (fun i -> half (a i @+ a ((n - i) mod n)))
|
||||
and am = array n (fun i -> half (a i @- a ((n - i) mod n)))
|
||||
and bp = array n (fun i -> half (b i @+ b ((n - i) mod n)))
|
||||
and bm = array n (fun i -> half (b i @- b ((n - i) mod n)))
|
||||
in
|
||||
|
||||
let fft_ap = dft 1 n ap
|
||||
and fft_am = dft 1 n am
|
||||
and fft_bp = dft 1 n bp
|
||||
and fft_bm = dft 1 n bm in
|
||||
|
||||
let fft_abpp = make_product n fft_ap fft_bp
|
||||
and fft_abpm = make_product n fft_ap fft_bm
|
||||
and fft_abmp = make_product n fft_am fft_bp
|
||||
and fft_abmm = make_product n fft_am fft_bm
|
||||
and sum = fft_ap 0 @+ fft_am 0
|
||||
and dc_term i = if (i == 0) then addtoall else zero in
|
||||
|
||||
let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i)
|
||||
and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in
|
||||
let conv1 = dft (-1) n fft_ab1
|
||||
and conv2 = dft (-1) n fft_ab2 in
|
||||
let conv = array n (fun i ->
|
||||
conv1 i @+ conv2 i) in
|
||||
(sum, conv)
|
||||
|
||||
(* generator of assignment list assigning conv to the convolution of
|
||||
a and b, all of which are of length n. addtoall is added to
|
||||
all of the elements of the result. Returns (sum, convolution) pair
|
||||
where sum is the sum of the elements of a. *)
|
||||
|
||||
in let gen_convolution =
|
||||
if (p <= !Magic.alternate_convolution) then
|
||||
gen_convolution_by_fft_alt
|
||||
else
|
||||
gen_convolution_by_fft
|
||||
|
||||
(* fft generator for prime n = p using Rader's algorithm for
|
||||
turning the fft into a convolution, which then can be
|
||||
performed in a variety of ways *)
|
||||
in
|
||||
let g = find_generator p in
|
||||
let ginv = pow_mod g (p - 2) p in
|
||||
let input_perm = array p (fun i -> input (pow_mod g i p))
|
||||
and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p)))
|
||||
and output_perm = array p (fun i -> pow_mod ginv i p)
|
||||
in let (sum, conv) =
|
||||
(gen_convolution (p - 1) input_perm omega_perm (input 0))
|
||||
in array p (fun i ->
|
||||
if (i = 0) then
|
||||
input 0 @+ sum
|
||||
else
|
||||
let i' = suchthat 0 (fun i' -> i = output_perm i')
|
||||
in conv i')
|
||||
|
||||
(* our modified version of the conjugate-pair split-radix algorithm,
|
||||
which reduces the number of multiplications by rescaling the
|
||||
sub-transforms (power-of-two n's only) *)
|
||||
and newsplit sign n input =
|
||||
let rec s n k = (* recursive scale factor *)
|
||||
if n <= 4 then
|
||||
one
|
||||
else
|
||||
let k4 = (abs k) mod (n / 4) in
|
||||
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
|
||||
(s (n / 4) k4') @* (real (exp n k4'))
|
||||
|
||||
and sinv n k = (* 1 / s(n,k) *)
|
||||
if n <= 4 then
|
||||
one
|
||||
else
|
||||
let k4 = (abs k) mod (n / 4) in
|
||||
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
|
||||
(sinv (n / 4) k4') @* (sec n k4')
|
||||
|
||||
in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *)
|
||||
and sdiv4 n k = (* s(n,k) / s(4*n,k) *)
|
||||
let k4 = (abs k) mod n in
|
||||
sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4))
|
||||
|
||||
in let t n k = (exp n k) @* (sdiv4 (n/4) k)
|
||||
|
||||
and dft1 input = input
|
||||
and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k))
|
||||
|
||||
in let rec newsplit0 sign n input =
|
||||
if (n == 1) then dft1 input
|
||||
else if (n == 2) then dft2 input
|
||||
else let u = newsplit0 sign (n / 2) (fun i -> input (i*2))
|
||||
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
|
||||
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n))
|
||||
and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in
|
||||
let w = array n (fun k -> twid k @* z (k mod (n / 4)))
|
||||
and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in
|
||||
let ww = array n (fun k -> w k @+ w' k) in
|
||||
array n (fun k -> u (k mod (n / 2)) @+ ww k)
|
||||
|
||||
and newsplitS sign n input =
|
||||
if (n == 1) then dft1 input
|
||||
else if (n == 2) then dft2 input
|
||||
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
|
||||
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
|
||||
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
|
||||
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
|
||||
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
|
||||
let ww = array n (fun k -> w k @+ w' k) in
|
||||
array n (fun k -> u (k mod (n / 2)) @+ ww k)
|
||||
|
||||
and newsplitS2 sign n input =
|
||||
if (n == 1) then dft1 input
|
||||
else if (n == 2) then dft2 input
|
||||
else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2))
|
||||
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
|
||||
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
|
||||
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
|
||||
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
|
||||
let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in
|
||||
array n (fun k -> u (k mod (n / 2)) @+ ww k)
|
||||
|
||||
and newsplitS4 sign n input =
|
||||
if (n == 1) then dft1 input
|
||||
else if (n == 2) then
|
||||
let f = dft2 input
|
||||
in array 2 (fun k -> (f k) @* (sinv 8 k))
|
||||
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
|
||||
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
|
||||
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
|
||||
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
|
||||
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
|
||||
let ww = array n (fun k -> w k @+ w' k) in
|
||||
array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k))
|
||||
|
||||
in newsplit0 sign n input
|
||||
|
||||
and dft sign n input =
|
||||
let rec cooley_tukey sign n1 n2 input =
|
||||
let tmp1 =
|
||||
array n2 (fun i2 ->
|
||||
dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in
|
||||
let tmp2 =
|
||||
array n1 (fun i1 ->
|
||||
array n2 (fun i2 ->
|
||||
exp n (sign * i1 * i2) @* tmp1 i2 i1)) in
|
||||
let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in
|
||||
(fun i -> tmp3 (i mod n1) (i / n1))
|
||||
|
||||
(*
|
||||
* This is "exponent -1" split-radix by Dan Bernstein.
|
||||
*)
|
||||
and split_radix_dit sign n input =
|
||||
let f0 = dft sign (n / 2) (fun i -> input (i * 2))
|
||||
and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1))
|
||||
and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in
|
||||
let g10 = array n (fun k ->
|
||||
exp n (sign * k) @* f10 (k mod (n / 4)))
|
||||
and g11 = array n (fun k ->
|
||||
exp n (- sign * k) @* f11 (k mod (n / 4))) in
|
||||
let g1 = array n (fun k -> g10 k @+ g11 k) in
|
||||
array n (fun k -> f0 (k mod (n / 2)) @+ g1 k)
|
||||
|
||||
and split_radix_dif sign n input =
|
||||
let n2 = n / 2 and n4 = n / 4 in
|
||||
let x0 = array n2 (fun i -> input i @+ input (i + n2))
|
||||
and x10 = array n4 (fun i -> input i @- input (i + n2))
|
||||
and x11 = array n4 (fun i ->
|
||||
input (i + n4) @- input (i + n2 + n4)) in
|
||||
let x1 k i =
|
||||
exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in
|
||||
let f0 = dft sign n2 x0
|
||||
and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in
|
||||
array n (fun k ->
|
||||
if k mod 2 = 0 then f0 (k / 2)
|
||||
else let k' = k mod 4 in f1 k' ((k - k') / 4))
|
||||
|
||||
and prime_factor sign n1 n2 input =
|
||||
let tmp1 = array n2 (fun i2 ->
|
||||
dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n)))
|
||||
in let tmp2 = array n1 (fun i1 ->
|
||||
dft sign n2 (fun k2 -> tmp1 k2 i1))
|
||||
in fun i -> tmp2 (i mod n1) (i mod n2)
|
||||
|
||||
in let algorithm sign n =
|
||||
let r = choose_factor n in
|
||||
if List.mem n !Magic.rader_list then
|
||||
(* special cases *)
|
||||
dft_rader sign n
|
||||
else if (r == 1) then (* n is prime *)
|
||||
dft_prime sign n
|
||||
else if (gcd r (n / r)) == 1 then
|
||||
prime_factor sign r (n / r)
|
||||
else if (n mod 4 = 0 && n > 4) then
|
||||
if !Magic.newsplit && is_power_of_two n then
|
||||
newsplit sign n
|
||||
else if !Magic.dif_split_radix then
|
||||
split_radix_dif sign n
|
||||
else
|
||||
split_radix_dit sign n
|
||||
else
|
||||
cooley_tukey sign r (n / r)
|
||||
in
|
||||
array n (algorithm sign n input)
|
||||
22
fftw-3.3.10/genfft/fft.mli
Normal file
22
fftw-3.3.10/genfft/fft.mli
Normal file
@@ -0,0 +1,22 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val dft : int -> int -> Complex.signal -> Complex.signal
|
||||
186
fftw-3.3.10/genfft/gen_hc2c.ml
Normal file
186
fftw-3.3.10/genfft/gen_hc2c.ml
Normal file
@@ -0,0 +1,186 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given R-stride";
|
||||
]
|
||||
|
||||
let byi = Complex.times Complex.i
|
||||
let byui = Complex.times (Complex.uminus Complex.i)
|
||||
|
||||
let sym n f i = if (i < n - i) then f i else Complex.conj (f i)
|
||||
|
||||
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
|
||||
|
||||
let generate n =
|
||||
let rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
|
||||
|
||||
(* the array names are from the point of view of the complex array
|
||||
(output in R2C, input in C2R) *)
|
||||
and arp = "Rp" (* real, positive *)
|
||||
and aip = "Ip" (* imag, positive *)
|
||||
and arm = "Rm" (* real, negative *)
|
||||
and aim = "Im" (* imag, negative *)
|
||||
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
|
||||
(* assume a single location. No point in doing alias analysis *)
|
||||
let the_location = (Unique.make (), Unique.make ()) in
|
||||
let locations _ = the_location in
|
||||
|
||||
let locr = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript arm vrs)
|
||||
locations "BUG")
|
||||
and loci = (locative_array_c n
|
||||
(C.array_subscript aip vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations "BUG")
|
||||
and locp = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript aip vrs)
|
||||
locations "BUG")
|
||||
and locm = (locative_array_c n
|
||||
(C.array_subscript arm vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations "BUG")
|
||||
in
|
||||
let locri i = if i mod 2 == 0 then locr (i/2) else loci ((i-1)/2)
|
||||
and locpm i = if i < n - i then locp i else locm (n-1-i)
|
||||
in
|
||||
|
||||
let asch =
|
||||
match !ditdif with
|
||||
| DIT ->
|
||||
let output = Fft.dft sign n (byw (load_array_c n locri)) in
|
||||
let odag = store_array_c n locpm (sym n output) in
|
||||
standard_optimizer odag
|
||||
|
||||
| DIF ->
|
||||
let output = byw (Fft.dft sign n (sym n (load_array_c n locpm))) in
|
||||
let odag = store_array_c n locri output in
|
||||
standard_optimizer odag
|
||||
in
|
||||
|
||||
let vms = CVar "ms"
|
||||
and varp = CVar arp
|
||||
and vaip = CVar aip
|
||||
and varm = CVar arm
|
||||
and vaim = CVar aim
|
||||
and vm = CVar m and vmb = CVar mb and vme = CVar me
|
||||
in
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (CPlus [vmb; CUminus (Integer 1)],
|
||||
Integer nt)])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (varp, CPlus [varp; byvl vms]);
|
||||
Expr_assign (vaip, CPlus [vaip; byvl vms]);
|
||||
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
|
||||
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
byvl (Integer nt)]);
|
||||
make_volatile_stride (4*n) (CVar rs)
|
||||
],
|
||||
Asch asch)])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ("static void", name,
|
||||
[Decl (C.realtypep, arp);
|
||||
Decl (C.realtypep, aip);
|
||||
Decl (C.realtypep, arm);
|
||||
Decl (C.realtypep, aim);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
|
||||
n name (flops_of tree)
|
||||
and register = "X(khc2c_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_RDFT);\n}" register name)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
208
fftw-3.3.10/genfft/gen_hc2cdft.ml
Normal file
208
fftw-3.3.10/genfft/gen_hc2cdft.ml
Normal file
@@ -0,0 +1,208 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given R-stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let byi = Complex.times Complex.i
|
||||
let byui = Complex.times (Complex.uminus Complex.i)
|
||||
|
||||
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
|
||||
|
||||
let generate n =
|
||||
let rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
|
||||
|
||||
(* the array names are from the point of view of the complex array
|
||||
(output in R2C, input in C2R) *)
|
||||
and arp = "Rp" (* real, positive *)
|
||||
and aip = "Ip" (* imag, positive *)
|
||||
and arm = "Rm" (* real, negative *)
|
||||
and aim = "Im" (* imag, negative *)
|
||||
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
|
||||
(* assume a single location. No point in doing alias analysis *)
|
||||
let the_location = (Unique.make (), Unique.make ()) in
|
||||
let locations _ = the_location in
|
||||
|
||||
let rlocp = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript aip vrs)
|
||||
locations "BUG")
|
||||
and rlocm = (locative_array_c n
|
||||
(C.array_subscript arm vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations "BUG")
|
||||
and clocp = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript aip vrs)
|
||||
locations "BUG")
|
||||
and clocm = (locative_array_c n
|
||||
(C.array_subscript arm vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations "BUG")
|
||||
in
|
||||
let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2)
|
||||
and cloc i = if i < n - i then clocp i else clocm (n-1-i)
|
||||
and sym n f i = if (i < n - i) then f i else Complex.conj (f i)
|
||||
and sym1 f i =
|
||||
if i mod 2 == 0 then
|
||||
Complex.plus [f i; Complex.conj (f (i+1))]
|
||||
else
|
||||
Complex.times (Complex.uminus Complex.i)
|
||||
(Complex.plus [f (i-1); Complex.uminus (Complex.conj (f i))])
|
||||
and sym1i f i =
|
||||
if i mod 2 == 0 then
|
||||
Complex.plus [f i; Complex.times Complex.i (f (i+1))]
|
||||
else
|
||||
Complex.conj
|
||||
(Complex.plus [f (i-1);
|
||||
Complex.times (Complex.uminus Complex.i) (f i)])
|
||||
in
|
||||
|
||||
let asch =
|
||||
match !ditdif with
|
||||
| DIT ->
|
||||
let output =
|
||||
(Complex.times Complex.half) @@
|
||||
(Fft.dft sign n (byw (sym1 (load_array_c n rloc)))) in
|
||||
let odag = store_array_c n cloc (sym n output) in
|
||||
standard_optimizer odag
|
||||
|
||||
| DIF ->
|
||||
let output =
|
||||
byw (Fft.dft sign n (sym n (load_array_c n cloc)))
|
||||
in
|
||||
let odag = store_array_c n rloc (sym1i output) in
|
||||
standard_optimizer odag
|
||||
in
|
||||
|
||||
let vms = CVar "ms"
|
||||
and varp = CVar arp
|
||||
and vaip = CVar aip
|
||||
and varm = CVar arm
|
||||
and vaim = CVar aim
|
||||
and vm = CVar m and vmb = CVar mb and vme = CVar me
|
||||
in
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (CPlus [vmb; CUminus (Integer 1)],
|
||||
Integer nt)])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (varp, CPlus [varp; byvl vms]);
|
||||
Expr_assign (vaip, CPlus [vaip; byvl vms]);
|
||||
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
|
||||
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
byvl (Integer nt)]);
|
||||
make_volatile_stride (4*n) (CVar rs)
|
||||
],
|
||||
Asch asch)]
|
||||
)
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ("static void", name,
|
||||
[Decl (C.realtypep, arp);
|
||||
Decl (C.realtypep, aip);
|
||||
Decl (C.realtypep, arm);
|
||||
Decl (C.realtypep, aim);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
|
||||
n name (flops_of tree)
|
||||
and register = "X(khc2c_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
221
fftw-3.3.10/genfft/gen_hc2cdft_c.ml
Normal file
221
fftw-3.3.10/genfft/gen_hc2cdft_c.ml
Normal file
@@ -0,0 +1,221 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given R-stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let byi = Complex.times Complex.i
|
||||
let byui = Complex.times (Complex.uminus Complex.i)
|
||||
|
||||
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
|
||||
|
||||
let generate n =
|
||||
let rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
|
||||
|
||||
(* the array names are from the point of view of the complex array
|
||||
(output in R2C, input in C2R) *)
|
||||
and arp = "Rp" (* real, positive *)
|
||||
and aip = "Ip" (* imag, positive *)
|
||||
and arm = "Rm" (* real, negative *)
|
||||
and aim = "Im" (* imag, negative *)
|
||||
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x))
|
||||
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
|
||||
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 true in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
let sms = stride_to_string "ms" !ums in
|
||||
let msms = "-" ^ sms in
|
||||
|
||||
(* assume a single location. No point in doing alias analysis *)
|
||||
let the_location = (Unique.make (), Unique.make ()) in
|
||||
let locations _ = the_location in
|
||||
|
||||
let rlocp = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript aip vrs)
|
||||
locations sms)
|
||||
and rlocm = (locative_array_c n
|
||||
(C.array_subscript arm vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations msms)
|
||||
and clocp = (locative_array_c n
|
||||
(C.array_subscript arp vrs)
|
||||
(C.array_subscript aip vrs)
|
||||
locations sms)
|
||||
and clocm = (locative_array_c n
|
||||
(C.array_subscript arm vrs)
|
||||
(C.array_subscript aim vrs)
|
||||
locations msms)
|
||||
in
|
||||
let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2)
|
||||
and cloc i = if i < n - i then clocp i else clocm (n-1-i)
|
||||
and sym n f i =
|
||||
if (i < n - i) then
|
||||
f i
|
||||
else
|
||||
Complex.times (Complex.nan Expr.CONJ) (f i)
|
||||
and sym1 f i =
|
||||
if i mod 2 == 0 then
|
||||
Complex.plus [f i;
|
||||
Complex.times (Complex.nan Expr.CONJ) (f (i+1))]
|
||||
else
|
||||
Complex.times (Complex.nan Expr.I)
|
||||
(Complex.plus [Complex.uminus (f (i-1));
|
||||
Complex.times (Complex.nan Expr.CONJ) (f i)])
|
||||
and sym1i f i =
|
||||
if i mod 2 == 0 then
|
||||
Complex.plus [f i;
|
||||
Complex.times (Complex.nan Expr.I) (f (i+1))]
|
||||
else
|
||||
Complex.times (Complex.nan Expr.CONJ)
|
||||
(Complex.plus [f (i-1);
|
||||
Complex.uminus
|
||||
(Complex.times (Complex.nan Expr.I) (f i))])
|
||||
in
|
||||
|
||||
let asch =
|
||||
match !ditdif with
|
||||
| DIT ->
|
||||
let output =
|
||||
(Complex.times Complex.half) @@
|
||||
(Trig.dft_via_rdft sign n (byw (sym1 (load_array_r n rloc)))) in
|
||||
let odag = store_array_r n cloc (sym n output) in
|
||||
standard_optimizer odag
|
||||
|
||||
| DIF ->
|
||||
let output =
|
||||
byw (Trig.dft_via_rdft sign n (sym n (load_array_r n cloc)))
|
||||
in
|
||||
let odag = store_array_r n rloc (sym1i output) in
|
||||
standard_optimizer odag
|
||||
in
|
||||
|
||||
let vms = CVar sms
|
||||
and varp = CVar arp
|
||||
and vaip = CVar aip
|
||||
and varm = CVar arm
|
||||
and vaim = CVar aim
|
||||
and vm = CVar m and vmb = CVar mb and vme = CVar me
|
||||
in
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (CPlus [vmb; CUminus (Integer 1)],
|
||||
bytwvl_vl (Integer nt))])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (varp, CPlus [varp; byvl vms]);
|
||||
Expr_assign (vaip, CPlus [vaip; byvl vms]);
|
||||
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
|
||||
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
bytwvl (Integer nt)]);
|
||||
make_volatile_stride (4*n) (CVar rs)
|
||||
],
|
||||
Asch asch)]
|
||||
)
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ("static void", name,
|
||||
[Decl (C.realtypep, arp);
|
||||
Decl (C.realtypep, aip);
|
||||
Decl (C.realtypep, arm);
|
||||
Decl (C.realtypep, aim);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const hc2c_desc desc = {%d, %s, twinstr, &GENUS, %s};\n\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
and register = "X(khc2c_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
Simdmagic.simd_mode := true;
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
170
fftw-3.3.10/genfft/gen_hc2hc.ml
Normal file
170
fftw-3.3.10/genfft/gen_hc2hc.ml
Normal file
@@ -0,0 +1,170 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given R-stride";
|
||||
]
|
||||
|
||||
let rioarray = "cr"
|
||||
and iioarray = "ci"
|
||||
|
||||
let genone sign n transform load store vrs =
|
||||
let locations = unique_array_c n in
|
||||
let input =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript iioarray vrs)
|
||||
locations "BUG" in
|
||||
let output = transform sign n (load n input) in
|
||||
let ioloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript iioarray vrs)
|
||||
locations "BUG" in
|
||||
let odag = store n ioloc output in
|
||||
let annot = standard_optimizer odag
|
||||
in annot
|
||||
|
||||
let byi = Complex.times Complex.i
|
||||
let byui = Complex.times (Complex.uminus Complex.i)
|
||||
|
||||
let sym1 n f i =
|
||||
Complex.plus [Complex.real (f i); byi (Complex.imag (f (n - 1 - i)))]
|
||||
|
||||
let sym2 n f i = if (i < n - i) then f i else byi (f i)
|
||||
let sym2i n f i = if (i < n - i) then f i else byui (f i)
|
||||
|
||||
let generate n =
|
||||
let rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
|
||||
let asch =
|
||||
match !ditdif with
|
||||
| DIT ->
|
||||
genone sign n
|
||||
(fun sign n input ->
|
||||
((sym1 n) @@ (sym2 n)) (Fft.dft sign n (byw input)))
|
||||
load_array_c store_array_c vrs
|
||||
| DIF ->
|
||||
genone sign n
|
||||
(fun sign n input ->
|
||||
byw (Fft.dft sign n (((sym2i n) @@ (sym1 n)) input)))
|
||||
load_array_c store_array_c vrs
|
||||
in
|
||||
|
||||
let vms = CVar "ms"
|
||||
and vrioarray = CVar rioarray
|
||||
and viioarray = CVar iioarray
|
||||
and vm = CVar m and vmb = CVar mb and vme = CVar me
|
||||
in
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (CPlus [vmb; CUminus (Integer 1)],
|
||||
Integer nt)])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (vrioarray, CPlus [vrioarray; byvl vms]);
|
||||
Expr_assign (viioarray,
|
||||
CPlus [viioarray; CUminus (byvl vms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
byvl (Integer nt)]);
|
||||
make_volatile_stride (2*n) (CVar rs)
|
||||
],
|
||||
Asch asch)])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ("static void", name,
|
||||
[Decl (C.realtypep, rioarray);
|
||||
Decl (C.realtypep, iioarray);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const hc2hc_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
|
||||
n name (flops_of tree)
|
||||
and register = "X(khc2hc_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
257
fftw-3.3.10/genfft/gen_mdct.ml
Normal file
257
fftw-3.3.10/genfft/gen_mdct.ml
Normal file
@@ -0,0 +1,257 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* generation of trigonometric transforms *)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let uistride = ref Stride_variable
|
||||
let uostride = ref Stride_variable
|
||||
let uivstride = ref Stride_variable
|
||||
let uovstride = ref Stride_variable
|
||||
let normalization = ref 1
|
||||
|
||||
type mode =
|
||||
| MDCT
|
||||
| MDCT_MP3
|
||||
| MDCT_VORBIS
|
||||
| MDCT_WINDOW
|
||||
| MDCT_WINDOW_SYM
|
||||
| IMDCT
|
||||
| IMDCT_MP3
|
||||
| IMDCT_VORBIS
|
||||
| IMDCT_WINDOW
|
||||
| IMDCT_WINDOW_SYM
|
||||
| NONE
|
||||
|
||||
let mode = ref NONE
|
||||
|
||||
let speclist = [
|
||||
"-with-istride",
|
||||
Arg.String(fun x -> uistride := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-ostride",
|
||||
Arg.String(fun x -> uostride := arg_to_stride x),
|
||||
" specialize for given output stride";
|
||||
|
||||
"-with-ivstride",
|
||||
Arg.String(fun x -> uivstride := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovstride",
|
||||
Arg.String(fun x -> uovstride := arg_to_stride x),
|
||||
" specialize for given output vector stride";
|
||||
|
||||
"-normalization",
|
||||
Arg.String(fun x -> normalization := int_of_string x),
|
||||
" normalization integer to divide by";
|
||||
|
||||
"-mdct",
|
||||
Arg.Unit(fun () -> mode := MDCT),
|
||||
" generate an MDCT codelet";
|
||||
|
||||
"-mdct-mp3",
|
||||
Arg.Unit(fun () -> mode := MDCT_MP3),
|
||||
" generate an MDCT codelet with MP3 windowing";
|
||||
|
||||
"-mdct-window",
|
||||
Arg.Unit(fun () -> mode := MDCT_WINDOW),
|
||||
" generate an MDCT codelet with window array";
|
||||
|
||||
"-mdct-window-sym",
|
||||
Arg.Unit(fun () -> mode := MDCT_WINDOW_SYM),
|
||||
" generate an MDCT codelet with symmetric window array";
|
||||
|
||||
"-imdct",
|
||||
Arg.Unit(fun () -> mode := IMDCT),
|
||||
" generate an IMDCT codelet";
|
||||
|
||||
"-imdct-mp3",
|
||||
Arg.Unit(fun () -> mode := IMDCT_MP3),
|
||||
" generate an IMDCT codelet with MP3 windowing";
|
||||
|
||||
"-imdct-window",
|
||||
Arg.Unit(fun () -> mode := IMDCT_WINDOW),
|
||||
" generate an IMDCT codelet with window array";
|
||||
|
||||
"-imdct-window-sym",
|
||||
Arg.Unit(fun () -> mode := IMDCT_WINDOW_SYM),
|
||||
" generate an IMDCT codelet with symmetric window array";
|
||||
]
|
||||
|
||||
let unity_window n i = Complex.one
|
||||
|
||||
(* MP3 window(k) = sin(pi/(2n) * (k + 1/2)) *)
|
||||
let mp3_window n k =
|
||||
Complex.imag (Complex.exp (8 * n) (2*k + 1))
|
||||
|
||||
(* Vorbis window(k) = sin(pi/2 * (mp3_window(k))^2)
|
||||
... this is transcendental, though, so we can't do it with our
|
||||
current Complex.exp function *)
|
||||
|
||||
let window_array n w =
|
||||
array n (fun i ->
|
||||
let stride = C.SInteger 1
|
||||
and klass = Unique.make () in
|
||||
let refr = C.array_subscript w stride i in
|
||||
let kr = Variable.make_constant klass refr in
|
||||
load_r (kr, kr))
|
||||
|
||||
let load_window w n i = w i
|
||||
let load_window_sym w n i = w (if (i < n) then i else (2*n - 1 - i))
|
||||
|
||||
(* fixme: use same locations for input and output so that it works in-place? *)
|
||||
|
||||
(* Note: only correct for even n! *)
|
||||
let load_array_mdct window n rarr iarr locations =
|
||||
let twon = 2 * n in
|
||||
let arr = load_array_c twon
|
||||
(locative_array_c twon rarr iarr locations "BUG") in
|
||||
let arrw = fun i -> Complex.times (window n i) (arr i) in
|
||||
array n
|
||||
((Complex.times Complex.half) @@
|
||||
(fun i ->
|
||||
if (i < n/2) then
|
||||
Complex.uminus (Complex.plus [arrw (i + n + n/2);
|
||||
arrw (n + n/2 - 1 - i)])
|
||||
else
|
||||
Complex.plus [arrw (i - n/2);
|
||||
Complex.uminus (arrw (n + n/2 - 1 - i))]))
|
||||
|
||||
let store_array_mdct window n rarr iarr locations arr =
|
||||
store_array_r n (locative_array_c n rarr iarr locations "BUG") arr
|
||||
|
||||
let load_array_imdct window n rarr iarr locations =
|
||||
load_array_c n (locative_array_c n rarr iarr locations "BUG")
|
||||
|
||||
let store_array_imdct window n rarr iarr locations arr =
|
||||
let n2 = n/2 in
|
||||
let threen2 = 3*n2 in
|
||||
let arr2 = fun i ->
|
||||
if (i < n2) then
|
||||
arr (i + n2)
|
||||
else if (i < threen2) then
|
||||
Complex.uminus (arr (threen2 - 1 - i))
|
||||
else
|
||||
Complex.uminus (arr (i - threen2))
|
||||
in
|
||||
let arr2w = fun i -> Complex.times (window n i) (arr2 i) in
|
||||
let twon = 2 * n in
|
||||
store_array_r twon (locative_array_c twon rarr iarr locations "BUG") arr2w
|
||||
|
||||
let window_param = function
|
||||
MDCT_WINDOW -> true
|
||||
| MDCT_WINDOW_SYM -> true
|
||||
| IMDCT_WINDOW -> true
|
||||
| IMDCT_WINDOW_SYM -> true
|
||||
| _ -> false
|
||||
|
||||
let generate n mode =
|
||||
let iarray = "I"
|
||||
and oarray = "O"
|
||||
and istride = "istride"
|
||||
and ostride = "ostride"
|
||||
and window = "W"
|
||||
and name = !Magic.codelet_name in
|
||||
|
||||
let vistride = either_stride (!uistride) (C.SVar istride)
|
||||
and vostride = either_stride (!uostride) (C.SVar ostride)
|
||||
in
|
||||
|
||||
let sivs = stride_to_string "ovs" !uovstride in
|
||||
let sovs = stride_to_string "ivs" !uivstride in
|
||||
|
||||
let (transform, load_input, store_output) = match mode with
|
||||
| MDCT -> Trig.dctIV, load_array_mdct unity_window,
|
||||
store_array_mdct unity_window
|
||||
| MDCT_MP3 -> Trig.dctIV, load_array_mdct mp3_window,
|
||||
store_array_mdct unity_window
|
||||
| MDCT_WINDOW -> Trig.dctIV, load_array_mdct
|
||||
(load_window (window_array (2 * n) window)),
|
||||
store_array_mdct unity_window
|
||||
| MDCT_WINDOW_SYM -> Trig.dctIV, load_array_mdct
|
||||
(load_window_sym (window_array n window)),
|
||||
store_array_mdct unity_window
|
||||
| IMDCT -> Trig.dctIV, load_array_imdct unity_window,
|
||||
store_array_imdct unity_window
|
||||
| IMDCT_MP3 -> Trig.dctIV, load_array_imdct unity_window,
|
||||
store_array_imdct mp3_window
|
||||
| IMDCT_WINDOW -> Trig.dctIV, load_array_imdct unity_window,
|
||||
store_array_imdct (load_window (window_array (2 * n) window))
|
||||
| IMDCT_WINDOW_SYM -> Trig.dctIV, load_array_imdct unity_window,
|
||||
store_array_imdct (load_window_sym (window_array n window))
|
||||
| _ -> failwith "must specify transform kind"
|
||||
in
|
||||
|
||||
let locations = unique_array_c (2*n) in
|
||||
let input =
|
||||
load_input n
|
||||
(C.array_subscript iarray vistride)
|
||||
(C.array_subscript "BUG" vistride)
|
||||
locations
|
||||
in
|
||||
let output = (Complex.times (Complex.inverse_int !normalization))
|
||||
@@ (transform n input) in
|
||||
let odag =
|
||||
store_output n
|
||||
(C.array_subscript oarray vostride)
|
||||
(C.array_subscript "BUG" vostride)
|
||||
locations
|
||||
output
|
||||
in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let tree =
|
||||
Fcn ("void", name,
|
||||
([Decl (C.constrealtypep, iarray);
|
||||
Decl (C.realtypep, oarray)]
|
||||
@ (if stride_fixed !uistride then []
|
||||
else [Decl (C.stridetype, istride)])
|
||||
@ (if stride_fixed !uostride then []
|
||||
else [Decl (C.stridetype, ostride)])
|
||||
@ (choose_simd []
|
||||
(if stride_fixed !uivstride then [] else
|
||||
[Decl ("int", sivs)]))
|
||||
@ (choose_simd []
|
||||
(if stride_fixed !uovstride then [] else
|
||||
[Decl ("int", sovs)]))
|
||||
@ (if (not (window_param mode)) then []
|
||||
else [Decl (C.constrealtypep, window)])
|
||||
),
|
||||
finalize_fcn (Asch annot))
|
||||
|
||||
in
|
||||
(unparse tree) ^ "\n"
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()) !mode);
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
168
fftw-3.3.10/genfft/gen_notw.ml
Normal file
168
fftw-3.3.10/genfft/gen_notw.ml
Normal file
@@ -0,0 +1,168 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let uistride = ref Stride_variable
|
||||
let uostride = ref Stride_variable
|
||||
let uivstride = ref Stride_variable
|
||||
let uovstride = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-with-istride",
|
||||
Arg.String(fun x -> uistride := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-ostride",
|
||||
Arg.String(fun x -> uostride := arg_to_stride x),
|
||||
" specialize for given output stride";
|
||||
|
||||
"-with-ivstride",
|
||||
Arg.String(fun x -> uivstride := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovstride",
|
||||
Arg.String(fun x -> uovstride := arg_to_stride x),
|
||||
" specialize for given output vector stride"
|
||||
]
|
||||
|
||||
let nonstandard_optimizer list_of_buddy_stores dag =
|
||||
let sched = standard_scheduler dag in
|
||||
let annot = Annotate.annotate list_of_buddy_stores sched in
|
||||
let _ = dump_asched annot in
|
||||
annot
|
||||
|
||||
let generate n =
|
||||
let riarray = "ri"
|
||||
and iiarray = "ii"
|
||||
and roarray = "ro"
|
||||
and ioarray = "io"
|
||||
and istride = "is"
|
||||
and ostride = "os"
|
||||
and i = "i"
|
||||
and v = "v"
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in
|
||||
let ename = expand_name name in
|
||||
|
||||
let vistride = either_stride (!uistride) (C.SVar istride)
|
||||
and vostride = either_stride (!uostride) (C.SVar ostride)
|
||||
in
|
||||
|
||||
let sovs = stride_to_string "ovs" !uovstride in
|
||||
let sivs = stride_to_string "ivs" !uivstride in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let input =
|
||||
locative_array_c n
|
||||
(C.array_subscript riarray vistride)
|
||||
(C.array_subscript iiarray vistride)
|
||||
locations sivs in
|
||||
let output = Fft.dft sign n (load_array_c n input) in
|
||||
let oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript roarray vostride)
|
||||
(C.array_subscript ioarray vostride)
|
||||
locations sovs in
|
||||
let list_of_buddy_stores =
|
||||
let k = !Simdmagic.store_multiple in
|
||||
if (k > 1) then
|
||||
if (n mod k == 0) then
|
||||
List.append
|
||||
(List.map
|
||||
(fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k))
|
||||
(iota (n / k)))
|
||||
(List.map
|
||||
(fun i -> List.map (fun j -> (snd (oloc (k * i + j)))) (iota k))
|
||||
(iota (n / k)))
|
||||
else failwith "invalid n for -store-multiple"
|
||||
else []
|
||||
in
|
||||
|
||||
let odag = store_array_c n oloc output in
|
||||
let annot = nonstandard_optimizer list_of_buddy_stores odag in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", i)],
|
||||
[For (Expr_assign (CVar i, CVar v),
|
||||
Binop (" > ", CVar i, Integer 0),
|
||||
list_to_comma
|
||||
[Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]);
|
||||
Expr_assign (CVar riarray, CPlus [CVar riarray;
|
||||
byvl (CVar sivs)]);
|
||||
Expr_assign (CVar iiarray, CPlus [CVar iiarray;
|
||||
byvl (CVar sivs)]);
|
||||
Expr_assign (CVar roarray, CPlus [CVar roarray;
|
||||
byvl (CVar sovs)]);
|
||||
Expr_assign (CVar ioarray, CPlus [CVar ioarray;
|
||||
byvl (CVar sovs)]);
|
||||
make_volatile_stride (4*n) (CVar istride);
|
||||
make_volatile_stride (4*n) (CVar ostride)
|
||||
],
|
||||
Asch annot)
|
||||
])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ((if !Magic.standalone then "void" else "static void"), ename,
|
||||
([Decl (C.constrealtypep, riarray);
|
||||
Decl (C.constrealtypep, iiarray);
|
||||
Decl (C.realtypep, roarray);
|
||||
Decl (C.realtypep, ioarray);
|
||||
Decl (C.stridetype, istride);
|
||||
Decl (C.stridetype, ostride);
|
||||
Decl ("INT", v);
|
||||
Decl ("INT", "ivs");
|
||||
Decl ("INT", "ovs")]),
|
||||
finalize_fcn body)
|
||||
|
||||
in let desc =
|
||||
Printf.sprintf
|
||||
"static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
(stride_to_solverparm !uistride) (stride_to_solverparm !uostride)
|
||||
(choose_simd "0" (stride_to_solverparm !uivstride))
|
||||
(choose_simd "0" (stride_to_solverparm !uovstride))
|
||||
|
||||
and init =
|
||||
(declare_register_fcn name) ^
|
||||
"{" ^
|
||||
" X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^
|
||||
"}\n"
|
||||
|
||||
in ((unparse tree) ^ "\n" ^
|
||||
(if !Magic.standalone then "" else desc ^ init))
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
165
fftw-3.3.10/genfft/gen_notw_c.ml
Normal file
165
fftw-3.3.10/genfft/gen_notw_c.ml
Normal file
@@ -0,0 +1,165 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let uistride = ref Stride_variable
|
||||
let uostride = ref Stride_variable
|
||||
let uivstride = ref Stride_variable
|
||||
let uovstride = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-with-istride",
|
||||
Arg.String(fun x -> uistride := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-ostride",
|
||||
Arg.String(fun x -> uostride := arg_to_stride x),
|
||||
" specialize for given output stride";
|
||||
|
||||
"-with-ivstride",
|
||||
Arg.String(fun x -> uivstride := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovstride",
|
||||
Arg.String(fun x -> uovstride := arg_to_stride x),
|
||||
" specialize for given output vector stride"
|
||||
]
|
||||
|
||||
let nonstandard_optimizer list_of_buddy_stores dag =
|
||||
let sched = standard_scheduler dag in
|
||||
let annot = Annotate.annotate list_of_buddy_stores sched in
|
||||
let _ = dump_asched annot in
|
||||
annot
|
||||
|
||||
let generate n =
|
||||
let riarray = "xi"
|
||||
and roarray = "xo"
|
||||
and istride = "is"
|
||||
and ostride = "os"
|
||||
and i = "i"
|
||||
and v = "v"
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
|
||||
let ename = expand_name name in
|
||||
|
||||
let vistride = either_stride (!uistride) (C.SVar istride)
|
||||
and vostride = either_stride (!uostride) (C.SVar ostride)
|
||||
in
|
||||
|
||||
let sivs = stride_to_string "ivs" !uivstride in
|
||||
let sovs = stride_to_string "ovs" !uovstride in
|
||||
|
||||
let fft = Trig.dft_via_rdft in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let input =
|
||||
locative_array_c n
|
||||
(C.array_subscript riarray vistride)
|
||||
(C.array_subscript "BUG" vistride)
|
||||
locations sivs in
|
||||
let output = fft sign n (load_array_r n input) in
|
||||
let oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript roarray vostride)
|
||||
(C.array_subscript "BUG" vostride)
|
||||
locations sovs in
|
||||
let list_of_buddy_stores =
|
||||
let k = !Simdmagic.store_multiple in
|
||||
if (k > 1) then
|
||||
if (n mod k == 0) then
|
||||
List.map
|
||||
(fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k))
|
||||
(iota (n / k))
|
||||
else failwith "invalid n for -store-multiple"
|
||||
else []
|
||||
in
|
||||
let odag = store_array_r n oloc output in
|
||||
let annot = nonstandard_optimizer list_of_buddy_stores odag in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", i);
|
||||
Decl (C.constrealtypep, riarray);
|
||||
Decl (C.realtypep, roarray)],
|
||||
[Stmt_assign (CVar riarray, CVar (if (sign < 0) then "ri" else "ii"));
|
||||
Stmt_assign (CVar roarray, CVar (if (sign < 0) then "ro" else "io"));
|
||||
For (Expr_assign (CVar i, CVar v),
|
||||
Binop (" > ", CVar i, Integer 0),
|
||||
list_to_comma
|
||||
[Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]);
|
||||
Expr_assign (CVar riarray, CPlus [CVar riarray;
|
||||
byvl (CVar sivs)]);
|
||||
Expr_assign (CVar roarray, CPlus [CVar roarray;
|
||||
byvl (CVar sovs)]);
|
||||
make_volatile_stride (2*n) (CVar istride);
|
||||
make_volatile_stride (2*n) (CVar ostride)
|
||||
],
|
||||
Asch annot);
|
||||
])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ((if !Magic.standalone then "void" else "static void"), ename,
|
||||
([Decl (C.constrealtypep, "ri");
|
||||
Decl (C.constrealtypep, "ii");
|
||||
Decl (C.realtypep, "ro");
|
||||
Decl (C.realtypep, "io");
|
||||
Decl (C.stridetype, istride);
|
||||
Decl (C.stridetype, ostride);
|
||||
Decl ("INT", v);
|
||||
Decl ("INT", "ivs");
|
||||
Decl ("INT", "ovs")]),
|
||||
finalize_fcn body)
|
||||
|
||||
in
|
||||
let desc =
|
||||
Printf.sprintf
|
||||
"static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
(stride_to_solverparm !uistride) (stride_to_solverparm !uostride)
|
||||
(choose_simd "0" (stride_to_solverparm !uivstride))
|
||||
(choose_simd "0" (stride_to_solverparm !uovstride))
|
||||
|
||||
and init =
|
||||
(declare_register_fcn name) ^
|
||||
"{" ^
|
||||
" X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^
|
||||
"}\n"
|
||||
|
||||
in ((unparse tree) ^ "\n" ^
|
||||
(if !Magic.standalone then "" else desc ^ init))
|
||||
|
||||
let main () =
|
||||
begin
|
||||
Simdmagic.simd_mode := true;
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
167
fftw-3.3.10/genfft/gen_r2cb.ml
Normal file
167
fftw-3.3.10/genfft/gen_r2cb.ml
Normal file
@@ -0,0 +1,167 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ucsr = ref Stride_variable
|
||||
let ucsi = ref Stride_variable
|
||||
let uivs = ref Stride_variable
|
||||
let uovs = ref Stride_variable
|
||||
let dftIII_flag = ref false
|
||||
|
||||
let speclist = [
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given real-array stride";
|
||||
|
||||
"-with-csr",
|
||||
Arg.String(fun x -> ucsr := arg_to_stride x),
|
||||
" specialize for given complex-array real stride";
|
||||
|
||||
"-with-csi",
|
||||
Arg.String(fun x -> ucsi := arg_to_stride x),
|
||||
" specialize for given complex-array imaginary stride";
|
||||
|
||||
"-with-ivs",
|
||||
Arg.String(fun x -> uivs := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovs",
|
||||
Arg.String(fun x -> uovs := arg_to_stride x),
|
||||
" specialize for given output vector stride";
|
||||
|
||||
"-dft-III",
|
||||
Arg.Unit(fun () -> dftIII_flag := true),
|
||||
" produce shifted dftIII-style codelets"
|
||||
]
|
||||
|
||||
let hcdftIII sign n input =
|
||||
let input' i =
|
||||
if (i mod 2 == 0) then
|
||||
Complex.zero
|
||||
else
|
||||
let i' = (i - 1) / 2 in
|
||||
if (2 * i' < n - 1) then (input i')
|
||||
else if (2 * i' == n - 1) then
|
||||
Complex.real (input i')
|
||||
else
|
||||
Complex.conj (input (n - 1 - i'))
|
||||
in Fft.dft sign (2 * n) input'
|
||||
|
||||
let generate n =
|
||||
let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci"
|
||||
and rs = "rs" and csr = "csr" and csi = "csi"
|
||||
and i = "i" and v = "v"
|
||||
and transform = if !dftIII_flag then hcdftIII else Trig.hdft
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs)
|
||||
and vcsr = either_stride (!ucsr) (C.SVar csr)
|
||||
and vcsi = either_stride (!ucsi) (C.SVar csi)
|
||||
in
|
||||
|
||||
let sovs = stride_to_string "ovs" !uovs in
|
||||
let sivs = stride_to_string "ivs" !uivs in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let input =
|
||||
locative_array_c n
|
||||
(C.array_subscript acr vcsr)
|
||||
(C.array_subscript aci vcsi)
|
||||
locations sivs in
|
||||
let output = transform sign n (load_array_hc n input) in
|
||||
let oloce =
|
||||
locative_array_c n
|
||||
(C.array_subscript ar0 vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sovs
|
||||
and oloco =
|
||||
locative_array_c n
|
||||
(C.array_subscript ar1 vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sovs in
|
||||
let oloc i = if i mod 2 == 0 then oloce (i/2) else oloco ((i-1)/2) in
|
||||
let odag = store_array_r n oloc output in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", i)],
|
||||
[For (Expr_assign (CVar i, CVar v),
|
||||
Binop (" > ", CVar i, Integer 0),
|
||||
list_to_comma
|
||||
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
|
||||
Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sovs]);
|
||||
Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sovs]);
|
||||
Expr_assign (CVar acr, CPlus [CVar acr; CVar sivs]);
|
||||
Expr_assign (CVar aci, CPlus [CVar aci; CVar sivs]);
|
||||
make_volatile_stride (4*n) (CVar rs);
|
||||
make_volatile_stride (4*n) (CVar csr);
|
||||
make_volatile_stride (4*n) (CVar csi)
|
||||
],
|
||||
Asch annot)
|
||||
])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ((if !Magic.standalone then "void" else "static void"), name,
|
||||
([Decl (C.realtypep, ar0);
|
||||
Decl (C.realtypep, ar1);
|
||||
Decl (C.realtypep, acr);
|
||||
Decl (C.realtypep, aci);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl (C.stridetype, csr);
|
||||
Decl (C.stridetype, csi);
|
||||
Decl ("INT", v);
|
||||
Decl ("INT", "ivs");
|
||||
Decl ("INT", "ovs")]),
|
||||
finalize_fcn body)
|
||||
|
||||
in let desc =
|
||||
Printf.sprintf
|
||||
"static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n"
|
||||
n name (flops_of tree)
|
||||
|
||||
and init =
|
||||
(declare_register_fcn name) ^
|
||||
"{" ^
|
||||
" X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^
|
||||
"}\n"
|
||||
|
||||
in
|
||||
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
164
fftw-3.3.10/genfft/gen_r2cf.ml
Normal file
164
fftw-3.3.10/genfft/gen_r2cf.ml
Normal file
@@ -0,0 +1,164 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ucsr = ref Stride_variable
|
||||
let ucsi = ref Stride_variable
|
||||
let uivs = ref Stride_variable
|
||||
let uovs = ref Stride_variable
|
||||
let dftII_flag = ref false
|
||||
|
||||
let speclist = [
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given real-array stride";
|
||||
|
||||
"-with-csr",
|
||||
Arg.String(fun x -> ucsr := arg_to_stride x),
|
||||
" specialize for given complex-array real stride";
|
||||
|
||||
"-with-csi",
|
||||
Arg.String(fun x -> ucsi := arg_to_stride x),
|
||||
" specialize for given complex-array imaginary stride";
|
||||
|
||||
"-with-ivs",
|
||||
Arg.String(fun x -> uivs := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovs",
|
||||
Arg.String(fun x -> uovs := arg_to_stride x),
|
||||
" specialize for given output vector stride";
|
||||
|
||||
"-dft-II",
|
||||
Arg.Unit(fun () -> dftII_flag := true),
|
||||
" produce shifted dftII-style codelets"
|
||||
]
|
||||
|
||||
let rdftII sign n input =
|
||||
let input' i = if i < n then input i else Complex.zero in
|
||||
let f = Fft.dft sign (2 * n) input' in
|
||||
let g i = f (2 * i + 1)
|
||||
in fun i ->
|
||||
if (i < n - i) then g i
|
||||
else if (2 * i + 1 == n) then Complex.real (g i)
|
||||
else Complex.zero
|
||||
|
||||
let generate n =
|
||||
let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci"
|
||||
and rs = "rs" and csr = "csr" and csi = "csi"
|
||||
and i = "i" and v = "v"
|
||||
and transform = if !dftII_flag then rdftII else Trig.rdft
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs)
|
||||
and vcsr = either_stride (!ucsr) (C.SVar csr)
|
||||
and vcsi = either_stride (!ucsi) (C.SVar csi)
|
||||
in
|
||||
|
||||
let sovs = stride_to_string "ovs" !uovs in
|
||||
let sivs = stride_to_string "ivs" !uivs in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let inpute =
|
||||
locative_array_c n
|
||||
(C.array_subscript ar0 vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sivs
|
||||
and inputo =
|
||||
locative_array_c n
|
||||
(C.array_subscript ar1 vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sivs
|
||||
in
|
||||
let input i = if i mod 2 == 0 then inpute (i/2) else inputo ((i-1)/2) in
|
||||
let output = transform sign n (load_array_r n input) in
|
||||
let oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript acr vcsr)
|
||||
(C.array_subscript aci vcsi)
|
||||
locations sovs in
|
||||
let odag = store_array_hc n oloc output in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", i)],
|
||||
[For (Expr_assign (CVar i, CVar v),
|
||||
Binop (" > ", CVar i, Integer 0),
|
||||
list_to_comma
|
||||
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
|
||||
Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sivs]);
|
||||
Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sivs]);
|
||||
Expr_assign (CVar acr, CPlus [CVar acr; CVar sovs]);
|
||||
Expr_assign (CVar aci, CPlus [CVar aci; CVar sovs]);
|
||||
make_volatile_stride (4*n) (CVar rs);
|
||||
make_volatile_stride (4*n) (CVar csr);
|
||||
make_volatile_stride (4*n) (CVar csi)
|
||||
],
|
||||
Asch annot)
|
||||
])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ((if !Magic.standalone then "void" else "static void"), name,
|
||||
([Decl (C.realtypep, ar0);
|
||||
Decl (C.realtypep, ar1);
|
||||
Decl (C.realtypep, acr);
|
||||
Decl (C.realtypep, aci);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl (C.stridetype, csr);
|
||||
Decl (C.stridetype, csi);
|
||||
Decl ("INT", v);
|
||||
Decl ("INT", "ivs");
|
||||
Decl ("INT", "ovs")]),
|
||||
finalize_fcn body)
|
||||
|
||||
in let desc =
|
||||
Printf.sprintf
|
||||
"static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n"
|
||||
n name (flops_of tree)
|
||||
|
||||
and init =
|
||||
(declare_register_fcn name) ^
|
||||
"{" ^
|
||||
" X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^
|
||||
"}\n"
|
||||
|
||||
in
|
||||
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
257
fftw-3.3.10/genfft/gen_r2r.ml
Normal file
257
fftw-3.3.10/genfft/gen_r2r.ml
Normal file
@@ -0,0 +1,257 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* generation of trigonometric transforms *)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
|
||||
|
||||
let uistride = ref Stride_variable
|
||||
let uostride = ref Stride_variable
|
||||
let uivstride = ref Stride_variable
|
||||
let uovstride = ref Stride_variable
|
||||
|
||||
type mode =
|
||||
| RDFT
|
||||
| HDFT
|
||||
| DHT
|
||||
| REDFT00
|
||||
| REDFT10
|
||||
| REDFT01
|
||||
| REDFT11
|
||||
| RODFT00
|
||||
| RODFT10
|
||||
| RODFT01
|
||||
| RODFT11
|
||||
| NONE
|
||||
|
||||
let mode = ref NONE
|
||||
let normsqr = ref 1
|
||||
let unitary = ref false
|
||||
let noloop = ref false
|
||||
|
||||
let speclist = [
|
||||
"-with-istride",
|
||||
Arg.String(fun x -> uistride := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-ostride",
|
||||
Arg.String(fun x -> uostride := arg_to_stride x),
|
||||
" specialize for given output stride";
|
||||
|
||||
"-with-ivstride",
|
||||
Arg.String(fun x -> uivstride := arg_to_stride x),
|
||||
" specialize for given input vector stride";
|
||||
|
||||
"-with-ovstride",
|
||||
Arg.String(fun x -> uovstride := arg_to_stride x),
|
||||
" specialize for given output vector stride";
|
||||
|
||||
"-rdft",
|
||||
Arg.Unit(fun () -> mode := RDFT),
|
||||
" generate a real DFT codelet";
|
||||
|
||||
"-hdft",
|
||||
Arg.Unit(fun () -> mode := HDFT),
|
||||
" generate a Hermitian DFT codelet";
|
||||
|
||||
"-dht",
|
||||
Arg.Unit(fun () -> mode := DHT),
|
||||
" generate a DHT codelet";
|
||||
|
||||
"-redft00",
|
||||
Arg.Unit(fun () -> mode := REDFT00),
|
||||
" generate a DCT-I codelet";
|
||||
|
||||
"-redft10",
|
||||
Arg.Unit(fun () -> mode := REDFT10),
|
||||
" generate a DCT-II codelet";
|
||||
|
||||
"-redft01",
|
||||
Arg.Unit(fun () -> mode := REDFT01),
|
||||
" generate a DCT-III codelet";
|
||||
|
||||
"-redft11",
|
||||
Arg.Unit(fun () -> mode := REDFT11),
|
||||
" generate a DCT-IV codelet";
|
||||
|
||||
"-rodft00",
|
||||
Arg.Unit(fun () -> mode := RODFT00),
|
||||
" generate a DST-I codelet";
|
||||
|
||||
"-rodft10",
|
||||
Arg.Unit(fun () -> mode := RODFT10),
|
||||
" generate a DST-II codelet";
|
||||
|
||||
"-rodft01",
|
||||
Arg.Unit(fun () -> mode := RODFT01),
|
||||
" generate a DST-III codelet";
|
||||
|
||||
"-rodft11",
|
||||
Arg.Unit(fun () -> mode := RODFT11),
|
||||
" generate a DST-IV codelet";
|
||||
|
||||
"-normalization",
|
||||
Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix),
|
||||
" normalization integer to divide by";
|
||||
|
||||
"-normsqr",
|
||||
Arg.String(fun x -> normsqr := int_of_string x),
|
||||
" integer square of normalization to divide by";
|
||||
|
||||
"-unitary",
|
||||
Arg.Unit(fun () -> unitary := true),
|
||||
" unitary normalization (up overall scale factor)";
|
||||
|
||||
"-noloop",
|
||||
Arg.Unit(fun () -> noloop := true),
|
||||
" no vector loop";
|
||||
]
|
||||
|
||||
let sqrt_half = Complex.inverse_int_sqrt 2
|
||||
let sqrt_two = Complex.int_sqrt 2
|
||||
|
||||
let rescale sc s1 s2 input i =
|
||||
if ((i == s1 || i == s2) && !unitary) then
|
||||
Complex.times (input i) sc
|
||||
else
|
||||
input i
|
||||
|
||||
let generate n mode =
|
||||
let iarray = "I"
|
||||
and oarray = "O"
|
||||
and istride = "is"
|
||||
and ostride = "os"
|
||||
and i = "i"
|
||||
and v = "v"
|
||||
in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name in
|
||||
|
||||
let vistride = either_stride (!uistride) (C.SVar istride)
|
||||
and vostride = either_stride (!uostride) (C.SVar ostride)
|
||||
in
|
||||
|
||||
let sovs = stride_to_string "ovs" !uovstride in
|
||||
let sivs = stride_to_string "ivs" !uivstride in
|
||||
|
||||
let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with
|
||||
| RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1
|
||||
| HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *)
|
||||
| DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1
|
||||
| REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1
|
||||
| REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1
|
||||
| REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1
|
||||
| REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1
|
||||
| RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1
|
||||
| RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1
|
||||
| RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1
|
||||
| RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1
|
||||
| _ -> failwith "must specify transform kind"
|
||||
in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let input = locative_array_c n
|
||||
(C.array_subscript iarray vistride)
|
||||
(C.array_subscript "BUG" vistride)
|
||||
locations sivs in
|
||||
let output = rescale sqrt_half so1 so2
|
||||
((Complex.times (Complex.inverse_int_sqrt !normsqr))
|
||||
@@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in
|
||||
let oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript oarray vostride)
|
||||
(C.array_subscript "BUG" vostride)
|
||||
locations sovs in
|
||||
let odag = store_output n oloc output in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let body = if !noloop then Block([], [Asch annot]) else Block (
|
||||
[Decl ("INT", i)],
|
||||
[For (Expr_assign (CVar i, CVar v),
|
||||
Binop (" > ", CVar i, Integer 0),
|
||||
list_to_comma
|
||||
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
|
||||
Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]);
|
||||
Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]);
|
||||
make_volatile_stride (2*n) (CVar istride);
|
||||
make_volatile_stride (2*n) (CVar ostride)
|
||||
],
|
||||
Asch annot)
|
||||
])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn ((if !Magic.standalone then "void" else "static void"), name,
|
||||
([Decl (C.constrealtypep, iarray);
|
||||
Decl (C.realtypep, oarray)]
|
||||
@ (if stride_fixed !uistride then []
|
||||
else [Decl (C.stridetype, istride)])
|
||||
@ (if stride_fixed !uostride then []
|
||||
else [Decl (C.stridetype, ostride)])
|
||||
@ (if !noloop then [] else
|
||||
[Decl ("INT", v)]
|
||||
@ (if stride_fixed !uivstride then []
|
||||
else [Decl ("INT", "ivs")])
|
||||
@ (if stride_fixed !uovstride then []
|
||||
else [Decl ("INT", "ovs")]))),
|
||||
finalize_fcn body)
|
||||
|
||||
in let desc =
|
||||
Printf.sprintf
|
||||
"static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n"
|
||||
n name (flops_of tree)
|
||||
(match mode with
|
||||
| RDFT -> "RDFT00"
|
||||
| HDFT -> "HDFT00"
|
||||
| DHT -> "DHT"
|
||||
| REDFT00 -> "REDFT00"
|
||||
| REDFT10 -> "REDFT10"
|
||||
| REDFT01 -> "REDFT01"
|
||||
| REDFT11 -> "REDFT11"
|
||||
| RODFT00 -> "RODFT00"
|
||||
| RODFT10 -> "RODFT10"
|
||||
| RODFT01 -> "RODFT01"
|
||||
| RODFT11 -> "RODFT11"
|
||||
| _ -> failwith "must specify a transform kind")
|
||||
|
||||
and init =
|
||||
(declare_register_fcn name) ^
|
||||
"{" ^
|
||||
" X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^
|
||||
"}\n"
|
||||
|
||||
in
|
||||
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse speclist usage;
|
||||
print_string (generate (check_size ()) !mode);
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
161
fftw-3.3.10/genfft/gen_twiddle.ml
Normal file
161
fftw-3.3.10/genfft/gen_twiddle.ml
Normal file
@@ -0,0 +1,161 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given i/o stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let generate n =
|
||||
let rioarray = "ri"
|
||||
and iioarray = "ii"
|
||||
and rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in
|
||||
let ename = expand_name name in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
let sms = stride_to_string "ms" !ums in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let iloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript iioarray vrs)
|
||||
locations sms
|
||||
and oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript iioarray vrs)
|
||||
locations sms
|
||||
in
|
||||
let liloc = load_array_c n iloc in
|
||||
let output =
|
||||
match !ditdif with
|
||||
| DIT -> array n (Fft.dft sign n (byw liloc))
|
||||
| DIF -> array n (byw (Fft.dft sign n liloc))
|
||||
in
|
||||
let odag = store_array_c n oloc output in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let vm = CVar m and vmb = CVar mb and vme = CVar me in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (vmb, Integer nt)])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
|
||||
byvl (CVar sms)]);
|
||||
Expr_assign (CVar iioarray, CPlus [CVar iioarray;
|
||||
byvl (CVar sms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
byvl (Integer nt)]);
|
||||
make_volatile_stride (2*n) (CVar rs)
|
||||
],
|
||||
Asch annot)])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn (((if !Magic.standalone then "" else "static ") ^ "void"),
|
||||
ename,
|
||||
[Decl (C.realtypep, rioarray);
|
||||
Decl (C.realtypep, iioarray);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "(2 * VL)" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
(stride_to_solverparm !urs) "0"
|
||||
(stride_to_solverparm !ums)
|
||||
and register =
|
||||
match !ditdif with
|
||||
| DIT -> "X(kdft_dit_register)"
|
||||
| DIF -> "X(kdft_dif_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^
|
||||
(if !Magic.standalone then "" else init)
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
165
fftw-3.3.10/genfft/gen_twiddle_c.ml
Normal file
165
fftw-3.3.10/genfft/gen_twiddle_c.ml
Normal file
@@ -0,0 +1,165 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given i/o stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let generate n =
|
||||
let rioarray = "x"
|
||||
and rs = "rs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x))
|
||||
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
|
||||
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
|
||||
let ename = expand_name name in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let byw = bytwiddle n sign (twiddle_array nt twarray) in
|
||||
|
||||
let vrs = either_stride (!urs) (C.SVar rs) in
|
||||
let sms = stride_to_string "ms" !ums in
|
||||
|
||||
let locations = unique_array_c n in
|
||||
let iloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sms
|
||||
and oloc =
|
||||
locative_array_c n
|
||||
(C.array_subscript rioarray vrs)
|
||||
(C.array_subscript "BUG" vrs)
|
||||
locations sms
|
||||
in
|
||||
let liloc = load_array_r n iloc in
|
||||
let fft = Trig.dft_via_rdft in
|
||||
let output =
|
||||
match !ditdif with
|
||||
| DIT -> array n (fft sign n (byw liloc))
|
||||
| DIF -> array n (byw (fft sign n liloc))
|
||||
in
|
||||
let odag = store_array_r n oloc output in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let vm = CVar m and vmb = CVar mb and vme = CVar me in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", m);
|
||||
Decl (C.realtypep, rioarray)],
|
||||
[Stmt_assign (CVar rioarray,
|
||||
CVar (if (sign < 0) then "ri" else "ii"));
|
||||
For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (vmb,
|
||||
bytwvl_vl (Integer nt))])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
|
||||
byvl (CVar sms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
bytwvl (Integer nt)]);
|
||||
make_volatile_stride n (CVar rs)
|
||||
],
|
||||
Asch annot)])
|
||||
in
|
||||
|
||||
let tree =
|
||||
Fcn (((if !Magic.standalone then "" else "static ") ^ "void"),
|
||||
ename,
|
||||
[Decl (C.realtypep, "ri");
|
||||
Decl (C.realtypep, "ii");
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
(stride_to_solverparm !urs) "0"
|
||||
(stride_to_solverparm !ums)
|
||||
and register =
|
||||
match !ditdif with
|
||||
| DIT -> "X(kdft_dit_register)"
|
||||
| DIF -> "X(kdft_dif_register)"
|
||||
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else init)
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
Simdmagic.simd_mode := true;
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
176
fftw-3.3.10/genfft/gen_twidsq.ml
Normal file
176
fftw-3.3.10/genfft/gen_twidsq.ml
Normal file
@@ -0,0 +1,176 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let reload_twiddle = ref false
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let uvs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-reload-twiddle",
|
||||
Arg.Unit(fun () -> reload_twiddle := true),
|
||||
" do not collect common twiddle factors";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-vs",
|
||||
Arg.String(fun x -> uvs := arg_to_stride x),
|
||||
" specialize for given vector stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let generate n =
|
||||
let rioarray = "rio"
|
||||
and iioarray = "iio"
|
||||
and rs = "rs" and vs = "vs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let svs = either_stride (!uvs) (C.SVar vs)
|
||||
and srs = either_stride (!urs) (C.SVar rs) in
|
||||
|
||||
let byw =
|
||||
if !reload_twiddle then
|
||||
array n (fun v -> bytwiddle n sign (twiddle_array nt twarray))
|
||||
else
|
||||
let a = bytwiddle n sign (twiddle_array nt twarray)
|
||||
in fun v -> a
|
||||
in
|
||||
|
||||
let locations = unique_v_array_c n n in
|
||||
|
||||
let ioi =
|
||||
locative_v_array_c n n
|
||||
(C.varray_subscript rioarray svs srs)
|
||||
(C.varray_subscript iioarray svs srs)
|
||||
locations "BUG"
|
||||
and ioo =
|
||||
locative_v_array_c n n
|
||||
(C.varray_subscript rioarray svs srs)
|
||||
(C.varray_subscript iioarray svs srs)
|
||||
locations "BUG"
|
||||
in
|
||||
|
||||
let lioi = load_v_array_c n n ioi in
|
||||
let output =
|
||||
match !ditdif with
|
||||
| DIT -> array n (fun v -> Fft.dft sign n (byw v (lioi v)))
|
||||
| DIF -> array n (fun v -> byw v (Fft.dft sign n (lioi v)))
|
||||
in
|
||||
|
||||
let odag = store_v_array_c n n ioo (transpose output) in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let vm = CVar m and vmb = CVar mb and vme = CVar me in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", m)],
|
||||
[For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (vmb, Integer nt)])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; Integer 1]);
|
||||
Expr_assign (CVar rioarray, CPlus [CVar rioarray; CVar ms]);
|
||||
Expr_assign (CVar iioarray, CPlus [CVar iioarray; CVar ms]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray; Integer nt]);
|
||||
make_volatile_stride (2*n) (CVar rs);
|
||||
make_volatile_stride (2*0) (CVar vs)
|
||||
],
|
||||
Asch annot)]) in
|
||||
|
||||
let tree =
|
||||
Fcn (("static void"), name,
|
||||
[Decl (C.realtypep, rioarray);
|
||||
Decl (C.realtypep, iioarray);
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl (C.stridetype, vs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(Twiddle.twinstr_to_c_string (twdesc n))
|
||||
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const ct_desc desc = {%d, \"%s\", twinstr, &GENUS, %s, %s, %s, %s};\n\n"
|
||||
n name (flops_of tree)
|
||||
(stride_to_solverparm !urs) (stride_to_solverparm !uvs)
|
||||
(stride_to_solverparm !ums)
|
||||
|
||||
and register =
|
||||
match !ditdif with
|
||||
| DIT -> "X(kdft_ditsq_register)"
|
||||
| DIF -> "X(kdft_difsq_register)"
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
187
fftw-3.3.10/genfft/gen_twidsq_c.ml
Normal file
187
fftw-3.3.10/genfft/gen_twidsq_c.ml
Normal file
@@ -0,0 +1,187 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Util
|
||||
open Genutil
|
||||
open C
|
||||
|
||||
type ditdif = DIT | DIF
|
||||
let ditdif = ref DIT
|
||||
|
||||
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
|
||||
|
||||
let reload_twiddle = ref false
|
||||
|
||||
let urs = ref Stride_variable
|
||||
let uvs = ref Stride_variable
|
||||
let ums = ref Stride_variable
|
||||
|
||||
let speclist = [
|
||||
"-dit",
|
||||
Arg.Unit(fun () -> ditdif := DIT),
|
||||
" generate a DIT codelet";
|
||||
|
||||
"-dif",
|
||||
Arg.Unit(fun () -> ditdif := DIF),
|
||||
" generate a DIF codelet";
|
||||
|
||||
"-reload-twiddle",
|
||||
Arg.Unit(fun () -> reload_twiddle := true),
|
||||
" do not collect common twiddle factors";
|
||||
|
||||
"-with-rs",
|
||||
Arg.String(fun x -> urs := arg_to_stride x),
|
||||
" specialize for given input stride";
|
||||
|
||||
"-with-vs",
|
||||
Arg.String(fun x -> uvs := arg_to_stride x),
|
||||
" specialize for given vector stride";
|
||||
|
||||
"-with-ms",
|
||||
Arg.String(fun x -> ums := arg_to_stride x),
|
||||
" specialize for given ms"
|
||||
]
|
||||
|
||||
let generate n =
|
||||
let rioarray = "x"
|
||||
and rs = "rs" and vs = "vs"
|
||||
and twarray = "W"
|
||||
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
|
||||
|
||||
let sign = !Genutil.sign
|
||||
and name = !Magic.codelet_name
|
||||
and byvl x = choose_simd x (ctimes (CVar "VL", x))
|
||||
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
|
||||
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
|
||||
let ename = expand_name name in
|
||||
|
||||
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in
|
||||
let nt = num_twiddles n in
|
||||
|
||||
let svs = either_stride (!uvs) (C.SVar vs)
|
||||
and srs = either_stride (!urs) (C.SVar rs) in
|
||||
let sms = stride_to_string "ms" !ums in
|
||||
|
||||
let byw =
|
||||
if !reload_twiddle then
|
||||
array n (fun v -> bytwiddle n sign (twiddle_array nt twarray))
|
||||
else
|
||||
let a = bytwiddle n sign (twiddle_array nt twarray)
|
||||
in fun v -> a
|
||||
in
|
||||
|
||||
let locations = unique_v_array_c n n in
|
||||
|
||||
let ioi =
|
||||
locative_v_array_c n n
|
||||
(C.varray_subscript rioarray svs srs)
|
||||
(C.varray_subscript "BUG" svs srs)
|
||||
locations sms
|
||||
and ioo =
|
||||
locative_v_array_c n n
|
||||
(C.varray_subscript rioarray svs srs)
|
||||
(C.varray_subscript "BUG" svs srs)
|
||||
locations sms
|
||||
in
|
||||
|
||||
let lioi = load_v_array_c n n ioi in
|
||||
let fft = Trig.dft_via_rdft in
|
||||
let output =
|
||||
match !ditdif with
|
||||
| DIT -> array n (fun v -> fft sign n (byw v (lioi v)))
|
||||
| DIF -> array n (fun v -> byw v (fft sign n (lioi v)))
|
||||
in
|
||||
|
||||
let odag = store_v_array_c n n ioo (transpose output) in
|
||||
let annot = standard_optimizer odag in
|
||||
|
||||
let vm = CVar m and vmb = CVar mb and vme = CVar me in
|
||||
|
||||
let body = Block (
|
||||
[Decl ("INT", m);
|
||||
Decl (C.realtypep, rioarray)],
|
||||
[Stmt_assign (CVar rioarray,
|
||||
CVar (if (sign < 0) then "ri" else "ii"));
|
||||
For (list_to_comma
|
||||
[Expr_assign (vm, vmb);
|
||||
Expr_assign (CVar twarray,
|
||||
CPlus [CVar twarray;
|
||||
ctimes (vmb,
|
||||
bytwvl_vl (Integer nt))])],
|
||||
Binop (" < ", vm, vme),
|
||||
list_to_comma
|
||||
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
|
||||
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
|
||||
byvl (CVar sms)]);
|
||||
Expr_assign (CVar twarray, CPlus [CVar twarray;
|
||||
bytwvl (Integer nt)]);
|
||||
make_volatile_stride (2*n) (CVar rs);
|
||||
make_volatile_stride (2*n) (CVar vs)
|
||||
],
|
||||
Asch annot)]) in
|
||||
|
||||
let tree =
|
||||
Fcn (("static void"), ename,
|
||||
[Decl (C.realtypep, "ri");
|
||||
Decl (C.realtypep, "ii");
|
||||
Decl (C.constrealtypep, twarray);
|
||||
Decl (C.stridetype, rs);
|
||||
Decl (C.stridetype, vs);
|
||||
Decl ("INT", mb);
|
||||
Decl ("INT", me);
|
||||
Decl ("INT", ms)],
|
||||
finalize_fcn body)
|
||||
in
|
||||
let twinstr =
|
||||
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
|
||||
(twinstr_to_string "VL" (twdesc n))
|
||||
|
||||
and desc =
|
||||
Printf.sprintf
|
||||
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
|
||||
n (stringify name) (flops_of tree)
|
||||
(stride_to_solverparm !urs)
|
||||
(stride_to_solverparm !uvs)
|
||||
(stride_to_solverparm !ums)
|
||||
|
||||
and register =
|
||||
match !ditdif with
|
||||
| DIT -> "X(kdft_ditsq_register)"
|
||||
| DIF -> "X(kdft_difsq_register)"
|
||||
in
|
||||
let init =
|
||||
"\n" ^
|
||||
twinstr ^
|
||||
desc ^
|
||||
(declare_register_fcn name) ^
|
||||
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
|
||||
in
|
||||
|
||||
(unparse tree) ^ "\n" ^ init
|
||||
|
||||
|
||||
let main () =
|
||||
begin
|
||||
parse (speclist @ Twiddle.speclist) usage;
|
||||
print_string (generate (check_size ()));
|
||||
end
|
||||
|
||||
let _ = main()
|
||||
328
fftw-3.3.10/genfft/genutil.ml
Normal file
328
fftw-3.3.10/genfft/genutil.ml
Normal file
@@ -0,0 +1,328 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* utilities common to all generators *)
|
||||
open Util
|
||||
|
||||
let choose_simd a b = if !Simdmagic.simd_mode then b else a
|
||||
|
||||
let unique_array n = array n (fun _ -> Unique.make ())
|
||||
let unique_array_c n =
|
||||
array n (fun _ ->
|
||||
(Unique.make (), Unique.make ()))
|
||||
|
||||
let unique_v_array_c veclen n =
|
||||
array veclen (fun _ ->
|
||||
unique_array_c n)
|
||||
|
||||
let locative_array_c n rarr iarr loc vs =
|
||||
array n (fun i ->
|
||||
let klass = Unique.make () in
|
||||
let (rloc, iloc) = loc i in
|
||||
(Variable.make_locative rloc klass rarr i vs,
|
||||
Variable.make_locative iloc klass iarr i vs))
|
||||
|
||||
let locative_v_array_c veclen n rarr iarr loc vs =
|
||||
array veclen (fun v ->
|
||||
array n (fun i ->
|
||||
let klass = Unique.make () in
|
||||
let (rloc, iloc) = loc v i in
|
||||
(Variable.make_locative rloc klass (rarr v) i vs,
|
||||
Variable.make_locative iloc klass (iarr v) i vs)))
|
||||
|
||||
let temporary_array n =
|
||||
array n (fun i -> Variable.make_temporary ())
|
||||
|
||||
let temporary_array_c n =
|
||||
let tmpr = temporary_array n
|
||||
and tmpi = temporary_array n
|
||||
in
|
||||
array n (fun i -> (tmpr i, tmpi i))
|
||||
|
||||
let temporary_v_array_c veclen n =
|
||||
array veclen (fun v -> temporary_array_c n)
|
||||
|
||||
let temporary_array_c n =
|
||||
let tmpr = temporary_array n
|
||||
and tmpi = temporary_array n
|
||||
in
|
||||
array n (fun i -> (tmpr i, tmpi i))
|
||||
|
||||
let load_c (vr, vi) = Complex.make (Expr.Load vr, Expr.Load vi)
|
||||
let load_r (vr, vi) = Complex.make (Expr.Load vr, Expr.Num (Number.zero))
|
||||
|
||||
let twiddle_array nt w =
|
||||
array (nt/2) (fun i ->
|
||||
let stride = choose_simd (C.SInteger 1) (C.SConst "TWVL")
|
||||
and klass = Unique.make () in
|
||||
let (refr, refi) = (C.array_subscript w stride (2 * i),
|
||||
C.array_subscript w stride (2 * i + 1))
|
||||
in
|
||||
let (kr, ki) = (Variable.make_constant klass refr,
|
||||
Variable.make_constant klass refi)
|
||||
in
|
||||
load_c (kr, ki))
|
||||
|
||||
|
||||
let load_array_c n var = array n (fun i -> load_c (var i))
|
||||
let load_array_r n var = array n (fun i -> load_r (var i))
|
||||
let load_array_hc n var =
|
||||
array n (fun i ->
|
||||
if (i < n - i) then
|
||||
load_c (var i)
|
||||
else if (i > n - i) then
|
||||
Complex.times Complex.i (load_c (var (n - i)))
|
||||
else
|
||||
load_r (var i))
|
||||
|
||||
let load_v_array_c veclen n var =
|
||||
array veclen (fun v -> load_array_c n (var v))
|
||||
|
||||
let store_c (vr, vi) x = [Complex.store_real vr x; Complex.store_imag vi x]
|
||||
let store_r (vr, vi) x = Complex.store_real vr x
|
||||
let store_i (vr, vi) x = Complex.store_imag vi x
|
||||
|
||||
let assign_array_c n dst src =
|
||||
List.flatten
|
||||
(rmap (iota n)
|
||||
(fun i ->
|
||||
let (ar, ai) = Complex.assign (dst i) (src i)
|
||||
in [ar; ai]))
|
||||
let assign_v_array_c veclen n dst src =
|
||||
List.flatten
|
||||
(rmap (iota veclen)
|
||||
(fun v ->
|
||||
assign_array_c n (dst v) (src v)))
|
||||
|
||||
let vassign_v_array_c veclen n dst src =
|
||||
List.flatten
|
||||
(rmap (iota n) (fun i ->
|
||||
List.flatten
|
||||
(rmap (iota veclen)
|
||||
(fun v ->
|
||||
let (ar, ai) = Complex.assign (dst v i) (src v i)
|
||||
in [ar; ai]))))
|
||||
|
||||
let store_array_r n dst src =
|
||||
rmap (iota n)
|
||||
(fun i -> store_r (dst i) (src i))
|
||||
|
||||
let store_array_c n dst src =
|
||||
List.flatten
|
||||
(rmap (iota n)
|
||||
(fun i -> store_c (dst i) (src i)))
|
||||
|
||||
let store_array_hc n dst src =
|
||||
List.flatten
|
||||
(rmap (iota n)
|
||||
(fun i ->
|
||||
if (i < n - i) then
|
||||
store_c (dst i) (src i)
|
||||
else if (i > n - i) then
|
||||
[]
|
||||
else
|
||||
[store_r (dst i) (Complex.real (src i))]))
|
||||
|
||||
|
||||
let store_v_array_c veclen n dst src =
|
||||
List.flatten
|
||||
(rmap (iota veclen)
|
||||
(fun v ->
|
||||
store_array_c n (dst v) (src v)))
|
||||
|
||||
|
||||
let elementwise f n a = array n (fun i -> f (a i))
|
||||
let conj_array_c = elementwise Complex.conj
|
||||
let real_array_c = elementwise Complex.real
|
||||
let imag_array_c = elementwise Complex.imag
|
||||
|
||||
let elementwise_v f veclen n a =
|
||||
array veclen (fun v ->
|
||||
array n (fun i -> f (a v i)))
|
||||
let conj_v_array_c = elementwise_v Complex.conj
|
||||
let real_v_array_c = elementwise_v Complex.real
|
||||
let imag_v_array_c = elementwise_v Complex.imag
|
||||
|
||||
|
||||
let transpose f i j = f j i
|
||||
let symmetrize f i j = if i <= j then f i j else f j i
|
||||
|
||||
(* utilities for command-line parsing *)
|
||||
let standard_arg_parse_fail _ = failwith "too many arguments"
|
||||
|
||||
let dump_dag alist =
|
||||
let fnam = !Magic.dag_dump_file in
|
||||
if (String.length fnam > 0) then
|
||||
let ochan = open_out fnam in
|
||||
begin
|
||||
To_alist.dump (output_string ochan) alist;
|
||||
close_out ochan;
|
||||
end
|
||||
|
||||
let dump_alist alist =
|
||||
let fnam = !Magic.alist_dump_file in
|
||||
if (String.length fnam > 0) then
|
||||
let ochan = open_out fnam in
|
||||
begin
|
||||
Expr.dump (output_string ochan) alist;
|
||||
close_out ochan;
|
||||
end
|
||||
|
||||
let dump_asched asched =
|
||||
let fnam = !Magic.asched_dump_file in
|
||||
if (String.length fnam > 0) then
|
||||
let ochan = open_out fnam in
|
||||
begin
|
||||
Annotate.dump (output_string ochan) asched;
|
||||
close_out ochan;
|
||||
end
|
||||
|
||||
(* utilities for optimization *)
|
||||
let standard_scheduler dag =
|
||||
let optim = Algsimp.algsimp dag in
|
||||
let alist = To_alist.to_assignments optim in
|
||||
let _ = dump_alist alist in
|
||||
let _ = dump_dag alist in
|
||||
if !Magic.precompute_twiddles then
|
||||
Schedule.isolate_precomputations_and_schedule alist
|
||||
else
|
||||
Schedule.schedule alist
|
||||
|
||||
let standard_optimizer dag =
|
||||
let sched = standard_scheduler dag in
|
||||
let annot = Annotate.annotate [] sched in
|
||||
let _ = dump_asched annot in
|
||||
annot
|
||||
|
||||
let size = ref None
|
||||
let sign = ref (-1)
|
||||
|
||||
let speclist = [
|
||||
"-n", Arg.Int(fun i -> size := Some i), " generate a codelet of size <n>";
|
||||
"-sign",
|
||||
Arg.Int(fun i ->
|
||||
if (i > 0) then
|
||||
sign := 1
|
||||
else
|
||||
sign := (-1)),
|
||||
" sign of transform";
|
||||
]
|
||||
|
||||
let check_size () =
|
||||
match !size with
|
||||
| Some i -> i
|
||||
| None -> failwith "must specify -n"
|
||||
|
||||
let expand_name name = if name = "" then "noname" else name
|
||||
|
||||
let declare_register_fcn name =
|
||||
if name = "" then
|
||||
"void NAME(planner *p)\n"
|
||||
else
|
||||
"void " ^ (choose_simd "X" "XSIMD") ^
|
||||
"(codelet_" ^ name ^ ")(planner *p)\n"
|
||||
|
||||
let stringify name =
|
||||
if name = "" then "STRINGIZE(NAME)" else
|
||||
choose_simd ("\"" ^ name ^ "\"")
|
||||
("XSIMD_STRING(\"" ^ name ^ "\")")
|
||||
|
||||
let parse user_speclist usage =
|
||||
Arg.parse
|
||||
(user_speclist @ speclist @ Magic.speclist @ Simdmagic.speclist)
|
||||
standard_arg_parse_fail
|
||||
usage
|
||||
|
||||
let rec list_to_c = function
|
||||
[] -> ""
|
||||
| [a] -> (string_of_int a)
|
||||
| a :: b -> (string_of_int a) ^ ", " ^ (list_to_c b)
|
||||
|
||||
let rec list_to_comma = function
|
||||
| [a; b] -> C.Comma (a, b)
|
||||
| a :: b -> C.Comma (a, list_to_comma b)
|
||||
| _ -> failwith "list_to_comma"
|
||||
|
||||
|
||||
type stride = Stride_variable | Fixed_int of int | Fixed_string of string
|
||||
|
||||
let either_stride a b =
|
||||
match a with
|
||||
Fixed_int x -> C.SInteger x
|
||||
| Fixed_string x -> C.SConst x
|
||||
| _ -> b
|
||||
|
||||
let stride_fixed = function
|
||||
Stride_variable -> false
|
||||
| _ -> true
|
||||
|
||||
let arg_to_stride s =
|
||||
try
|
||||
Fixed_int (int_of_string s)
|
||||
with Failure "int_of_string" ->
|
||||
Fixed_string s
|
||||
|
||||
let stride_to_solverparm = function
|
||||
Stride_variable -> "0"
|
||||
| Fixed_int x -> string_of_int x
|
||||
| Fixed_string x -> x
|
||||
|
||||
let stride_to_string s = function
|
||||
Stride_variable -> s
|
||||
| Fixed_int x -> string_of_int x
|
||||
| Fixed_string x -> x
|
||||
|
||||
(* output the command line *)
|
||||
let cmdline () =
|
||||
List.fold_right (fun a b -> a ^ " " ^ b) (Array.to_list Sys.argv) ""
|
||||
|
||||
let unparse tree =
|
||||
"/* Generated by: " ^ (cmdline ()) ^ "*/\n\n" ^
|
||||
(C.print_cost tree) ^
|
||||
(if String.length !Magic.inklude > 0
|
||||
then
|
||||
(Printf.sprintf "#include \"%s\"\n\n" !Magic.inklude)
|
||||
else "") ^
|
||||
(if !Simdmagic.simd_mode then
|
||||
Simd.unparse_function tree
|
||||
else
|
||||
C.unparse_function tree)
|
||||
|
||||
let finalize_fcn ast =
|
||||
let mergedecls = function
|
||||
C.Block (d1, [C.Block (d2, s)]) -> C.Block (d1 @ d2, s)
|
||||
| x -> x
|
||||
and extract_constants =
|
||||
if !Simdmagic.simd_mode then
|
||||
Simd.extract_constants
|
||||
else
|
||||
C.extract_constants
|
||||
|
||||
in mergedecls (C.Block (extract_constants ast, [ast; C.Simd_leavefun]))
|
||||
|
||||
let twinstr_to_string vl x =
|
||||
if !Simdmagic.simd_mode then
|
||||
Twiddle.twinstr_to_simd_string vl x
|
||||
else
|
||||
Twiddle.twinstr_to_c_string x
|
||||
|
||||
let make_volatile_stride n x =
|
||||
C.CCall ("MAKE_VOLATILE_STRIDE", C.Comma((C.Integer n), x))
|
||||
71
fftw-3.3.10/genfft/littlesimp.ml
Normal file
71
fftw-3.3.10/genfft/littlesimp.ml
Normal file
@@ -0,0 +1,71 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*
|
||||
* The LittleSimplifier module implements a subset of the simplifications
|
||||
* of the AlgSimp module. These simplifications can be executed
|
||||
* quickly here, while they would take a long time using the heavy
|
||||
* machinery of AlgSimp.
|
||||
*
|
||||
* For example, 0 * x is simplified to 0 tout court by the LittleSimplifier.
|
||||
* On the other hand, AlgSimp would first simplify x, generating lots
|
||||
* of common subexpressions, storing them in a table etc, just to
|
||||
* discard all the work later. Similarly, the LittleSimplifier
|
||||
* reduces the constant FFT in Rader's algorithm to a constant sequence.
|
||||
*)
|
||||
|
||||
open Expr
|
||||
|
||||
let rec makeNum = function
|
||||
| n -> Num n
|
||||
|
||||
and makeUminus = function
|
||||
| Uminus a -> a
|
||||
| Num a -> makeNum (Number.negate a)
|
||||
| a -> Uminus a
|
||||
|
||||
and makeTimes = function
|
||||
| (Num a, Num b) -> makeNum (Number.mul a b)
|
||||
| (Num a, Times (Num b, c)) -> makeTimes (makeNum (Number.mul a b), c)
|
||||
| (Num a, b) when Number.is_zero a -> makeNum (Number.zero)
|
||||
| (Num a, b) when Number.is_one a -> b
|
||||
| (Num a, b) when Number.is_mone a -> makeUminus b
|
||||
| (Num a, Uminus b) -> Times (makeUminus (Num a), b)
|
||||
| (a, (Num b as b')) -> makeTimes (b', a)
|
||||
| (a, b) -> Times (a, b)
|
||||
|
||||
and makePlus l =
|
||||
let rec reduceSum x = match x with
|
||||
[] -> []
|
||||
| [Num a] -> if Number.is_zero a then [] else x
|
||||
| (Num a) :: (Num b) :: c ->
|
||||
reduceSum ((makeNum (Number.add a b)) :: c)
|
||||
| ((Num _) as a') :: b :: c -> b :: reduceSum (a' :: c)
|
||||
| a :: s -> a :: reduceSum s
|
||||
|
||||
in match reduceSum l with
|
||||
[] -> makeNum (Number.zero)
|
||||
| [a] -> a
|
||||
| [a; b] when a == b -> makeTimes (Num Number.two, a)
|
||||
| [Times (Num a, b); Times (Num c, d)] when b == d ->
|
||||
makeTimes (makePlus [Num a; Num c], b)
|
||||
| a -> Plus a
|
||||
|
||||
25
fftw-3.3.10/genfft/littlesimp.mli
Normal file
25
fftw-3.3.10/genfft/littlesimp.mli
Normal file
@@ -0,0 +1,25 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val makeNum : Number.number -> Expr.expr
|
||||
val makeUminus : Expr.expr -> Expr.expr
|
||||
val makeTimes : Expr.expr * Expr.expr -> Expr.expr
|
||||
val makePlus : Expr.expr list -> Expr.expr
|
||||
161
fftw-3.3.10/genfft/magic.ml
Normal file
161
fftw-3.3.10/genfft/magic.ml
Normal file
@@ -0,0 +1,161 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* magic parameters *)
|
||||
let verbose = ref false
|
||||
let vneg = ref false
|
||||
let karatsuba_min = ref 15
|
||||
let karatsuba_variant = ref 2
|
||||
let circular_min = ref 64
|
||||
let rader_min = ref 13
|
||||
let rader_list = ref [5]
|
||||
let alternate_convolution = ref 17
|
||||
let threemult = ref false
|
||||
let inline_single = ref true
|
||||
let inline_loads = ref false
|
||||
let inline_loads_constants = ref false
|
||||
let inline_constants = ref true
|
||||
let trivial_stores = ref false
|
||||
let locations_are_special = ref false
|
||||
let strength_reduce_mul = ref false
|
||||
let number_of_variables = ref 4
|
||||
let codelet_name = ref "unnamed"
|
||||
let randomized_cse = ref true
|
||||
let dif_split_radix = ref false
|
||||
let enable_fma = ref false
|
||||
let deep_collect_depth = ref 1
|
||||
let schedule_type = ref 0
|
||||
let compact = ref false
|
||||
let dag_dump_file = ref ""
|
||||
let alist_dump_file = ref ""
|
||||
let asched_dump_file = ref ""
|
||||
let lisp_syntax = ref false
|
||||
let network_transposition = ref true
|
||||
let inklude = ref ""
|
||||
let generic_arith = ref false
|
||||
let reorder_insns = ref false
|
||||
let reorder_loads = ref false
|
||||
let reorder_stores = ref false
|
||||
let precompute_twiddles = ref false
|
||||
let newsplit = ref false
|
||||
let standalone = ref false
|
||||
let pipeline_latency = ref 0
|
||||
let schedule_for_pipeline = ref false
|
||||
let generate_bytw = ref true
|
||||
|
||||
(* command-line parser for magic parameters *)
|
||||
let undocumented = " Undocumented voodoo parameter"
|
||||
|
||||
let set_bool var = Arg.Unit (fun () -> var := true)
|
||||
let unset_bool var = Arg.Unit (fun () -> var := false)
|
||||
let set_int var = Arg.Int(fun i -> var := i)
|
||||
let set_string var = Arg.String(fun s -> var := s)
|
||||
|
||||
let speclist = [
|
||||
"-name", set_string codelet_name, " set codelet name";
|
||||
"-standalone", set_bool standalone, " standalone codelet (no desc)";
|
||||
"-include", set_string inklude, undocumented;
|
||||
|
||||
"-verbose", set_bool verbose, " Enable verbose logging messages to stderr";
|
||||
|
||||
"-rader-min", set_int rader_min,
|
||||
"<n> : Use Rader's algorithm for prime sizes >= <n>";
|
||||
|
||||
"-threemult", set_bool threemult,
|
||||
" Use 3-multiply complex multiplications";
|
||||
|
||||
"-karatsuba-min", set_int karatsuba_min, undocumented;
|
||||
"-karatsuba-variant", set_int karatsuba_variant, undocumented;
|
||||
"-circular-min", set_int circular_min, undocumented;
|
||||
|
||||
"-compact", set_bool compact,
|
||||
" Mangle variable names to reduce size of source code";
|
||||
"-no-compact", unset_bool compact,
|
||||
" Disable -compact";
|
||||
|
||||
"-dump-dag", set_string dag_dump_file, undocumented;
|
||||
"-dump-alist", set_string alist_dump_file, undocumented;
|
||||
"-dump-asched", set_string asched_dump_file, undocumented;
|
||||
"-lisp-syntax", set_bool lisp_syntax, undocumented;
|
||||
|
||||
"-alternate-convolution", set_int alternate_convolution, undocumented;
|
||||
"-deep-collect-depth", set_int deep_collect_depth, undocumented;
|
||||
"-schedule-type", set_int schedule_type, undocumented;
|
||||
"-pipeline-latency", set_int pipeline_latency, undocumented;
|
||||
"-schedule-for-pipeline", set_bool schedule_for_pipeline, undocumented;
|
||||
|
||||
"-dif-split-radix", set_bool dif_split_radix, undocumented;
|
||||
"-dit-split-radix", unset_bool dif_split_radix, undocumented;
|
||||
|
||||
"-generic-arith", set_bool generic_arith, undocumented;
|
||||
"-no-generic-arith", unset_bool generic_arith, undocumented;
|
||||
|
||||
"-precompute-twiddles", set_bool precompute_twiddles, undocumented;
|
||||
"-no-precompute-twiddles", unset_bool precompute_twiddles, undocumented;
|
||||
|
||||
"-inline-single", set_bool inline_single, undocumented;
|
||||
"-no-inline-single", unset_bool inline_single, undocumented;
|
||||
|
||||
"-inline-loads", set_bool inline_loads, undocumented;
|
||||
"-no-inline-loads", unset_bool inline_loads, undocumented;
|
||||
|
||||
"-inline-loads-constants", set_bool inline_loads_constants, undocumented;
|
||||
"-no-inline-loads-constants",
|
||||
unset_bool inline_loads_constants, undocumented;
|
||||
|
||||
"-inline-constants", set_bool inline_constants, undocumented;
|
||||
"-no-inline-constants", unset_bool inline_constants, undocumented;
|
||||
|
||||
"-trivial-stores", set_bool trivial_stores, undocumented;
|
||||
"-no-trivial-stores", unset_bool trivial_stores, undocumented;
|
||||
|
||||
"-locations-are-special", set_bool locations_are_special, undocumented;
|
||||
"-no-locations-are-special", unset_bool locations_are_special, undocumented;
|
||||
|
||||
"-randomized-cse", set_bool randomized_cse, undocumented;
|
||||
"-no-randomized-cse", unset_bool randomized_cse, undocumented;
|
||||
|
||||
"-network-transposition", set_bool network_transposition, undocumented;
|
||||
"-no-network-transposition", unset_bool network_transposition, undocumented;
|
||||
|
||||
"-reorder-insns", set_bool reorder_insns, undocumented;
|
||||
"-no-reorder-insns", unset_bool reorder_insns, undocumented;
|
||||
"-reorder-loads", set_bool reorder_loads, undocumented;
|
||||
"-no-reorder-loads", unset_bool reorder_loads, undocumented;
|
||||
"-reorder-stores", set_bool reorder_stores, undocumented;
|
||||
"-no-reorder-stores", unset_bool reorder_stores, undocumented;
|
||||
|
||||
"-newsplit", set_bool newsplit, undocumented;
|
||||
|
||||
"-vneg", set_bool vneg, undocumented;
|
||||
"-fma", set_bool enable_fma, undocumented;
|
||||
"-no-fma", unset_bool enable_fma, undocumented;
|
||||
|
||||
"-variables", set_int number_of_variables, undocumented;
|
||||
|
||||
"-strength-reduce-mul", set_bool strength_reduce_mul, undocumented;
|
||||
"-no-strength-reduce-mul", unset_bool strength_reduce_mul, undocumented;
|
||||
|
||||
"-generate-bytw", set_bool generate_bytw, undocumented;
|
||||
"-no-generate-bytw", unset_bool generate_bytw, undocumented;
|
||||
]
|
||||
|
||||
|
||||
75
fftw-3.3.10/genfft/monads.ml
Normal file
75
fftw-3.3.10/genfft/monads.ml
Normal file
@@ -0,0 +1,75 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*************************************************************
|
||||
* Monads
|
||||
*************************************************************)
|
||||
|
||||
(*
|
||||
* Phil Wadler has many well written papers about monads. See
|
||||
* http://cm.bell-labs.com/cm/cs/who/wadler/
|
||||
*)
|
||||
(* vanilla state monad *)
|
||||
module StateMonad = struct
|
||||
let returnM x = fun s -> (x, s)
|
||||
|
||||
let (>>=) = fun m k ->
|
||||
fun s ->
|
||||
let (a', s') = m s
|
||||
in let (a'', s'') = k a' s'
|
||||
in (a'', s'')
|
||||
|
||||
let (>>) = fun m k ->
|
||||
m >>= fun _ -> k
|
||||
|
||||
let rec mapM f = function
|
||||
[] -> returnM []
|
||||
| a :: b ->
|
||||
f a >>= fun a' ->
|
||||
mapM f b >>= fun b' ->
|
||||
returnM (a' :: b')
|
||||
|
||||
let runM m x initial_state =
|
||||
let (a, _) = m x initial_state
|
||||
in a
|
||||
|
||||
let fetchState =
|
||||
fun s -> s, s
|
||||
|
||||
let storeState newState =
|
||||
fun _ -> (), newState
|
||||
end
|
||||
|
||||
(* monad with built-in memoizing capabilities *)
|
||||
module MemoMonad =
|
||||
struct
|
||||
open StateMonad
|
||||
|
||||
let memoizing lookupM insertM f k =
|
||||
lookupM k >>= fun vMaybe ->
|
||||
match vMaybe with
|
||||
Some value -> returnM value
|
||||
| None ->
|
||||
f k >>= fun value ->
|
||||
insertM k value >> returnM value
|
||||
|
||||
let runM initial_state m x = StateMonad.runM m x initial_state
|
||||
end
|
||||
164
fftw-3.3.10/genfft/number.ml
Normal file
164
fftw-3.3.10/genfft/number.ml
Normal file
@@ -0,0 +1,164 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* The generator keeps track of numeric constants in symbolic
|
||||
expressions using the abstract number type, defined in this file.
|
||||
|
||||
Our implementation of the number type uses arbitrary-precision
|
||||
arithmetic from the built-in Num package in order to maintain an
|
||||
accurate representation of constants. This allows us to output
|
||||
constants with many decimal places in the generated C code,
|
||||
ensuring that we will take advantage of the full precision
|
||||
available on current and future machines.
|
||||
|
||||
Note that we have to write our own routine to compute roots of
|
||||
unity, since the Num package only supplies simple arithmetic. The
|
||||
arbitrary-precision operations in Num look like the normal
|
||||
operations except that they have an appended slash (e.g. +/ -/ */
|
||||
// etcetera). *)
|
||||
|
||||
open Num
|
||||
|
||||
type number = N of num
|
||||
|
||||
let makeNum n = N n
|
||||
|
||||
(* decimal digits of precision to maintain internally, and to print out: *)
|
||||
let precision = 50
|
||||
let print_precision = 45
|
||||
|
||||
let inveps = (Int 10) **/ (Int precision)
|
||||
let epsilon = (Int 1) // inveps
|
||||
|
||||
let pinveps = (Int 10) **/ (Int print_precision)
|
||||
let pepsilon = (Int 1) // pinveps
|
||||
|
||||
let round x = epsilon */ (round_num (x */ inveps))
|
||||
|
||||
let of_int n = N (Int n)
|
||||
let zero = of_int 0
|
||||
let one = of_int 1
|
||||
let two = of_int 2
|
||||
let mone = of_int (-1)
|
||||
|
||||
(* comparison predicate for real numbers *)
|
||||
let equal (N x) (N y) = (* use both relative and absolute error *)
|
||||
let absdiff = abs_num (x -/ y) in
|
||||
absdiff <=/ pepsilon ||
|
||||
absdiff <=/ pepsilon */ (abs_num x +/ abs_num y)
|
||||
|
||||
let is_zero = equal zero
|
||||
let is_one = equal one
|
||||
let is_mone = equal mone
|
||||
let is_two = equal two
|
||||
|
||||
|
||||
(* Note that, in the following computations, it is important to round
|
||||
to precision epsilon after each operation. Otherwise, since the
|
||||
Num package uses exact rational arithmetic, the number of digits
|
||||
quickly blows up. *)
|
||||
let mul (N a) (N b) = makeNum (round (a */ b))
|
||||
let div (N a) (N b) = makeNum (round (a // b))
|
||||
let add (N a) (N b) = makeNum (round (a +/ b))
|
||||
let sub (N a) (N b) = makeNum (round (a -/ b))
|
||||
|
||||
let negative (N a) = (a </ (Int 0))
|
||||
let negate (N a) = makeNum (minus_num a)
|
||||
|
||||
let greater a b = negative (sub b a)
|
||||
|
||||
let epsilonsq = epsilon */ epsilon
|
||||
let epsilonsq2 = (Int 100) */ epsilonsq
|
||||
|
||||
let sqr a = a */ a
|
||||
let almost_equal (N a) (N b) = (sqr (a -/ b)) <=/ epsilonsq2
|
||||
|
||||
(* find square root by Newton's method *)
|
||||
let sqrt a =
|
||||
let rec sqrt_iter guess =
|
||||
let newguess = div (add guess (div a guess)) two in
|
||||
if (almost_equal newguess guess) then newguess
|
||||
else sqrt_iter newguess
|
||||
in sqrt_iter (div a two)
|
||||
|
||||
let csub (xr, xi) (yr, yi) = (round (xr -/ yr), round (xi -/ yi))
|
||||
let cdiv (xr, xi) r = (round (xr // r), round (xi // r))
|
||||
let cmul (xr, xi) (yr, yi) = (round (xr */ yr -/ xi */ yi),
|
||||
round (xr */ yi +/ xi */ yr))
|
||||
let csqr (xr, xi) = (round (xr */ xr -/ xi */ xi), round ((Int 2) */ xr */ xi))
|
||||
let cabssq (xr, xi) = xr */ xr +/ xi */ xi
|
||||
let cconj (xr, xi) = (xr, minus_num xi)
|
||||
let cinv x = cdiv (cconj x) (cabssq x)
|
||||
|
||||
let almost_equal_cnum (xr, xi) (yr, yi) =
|
||||
(cabssq (xr -/ yr,xi -/ yi)) <=/ epsilonsq2
|
||||
|
||||
(* Put a complex number to an integer power by repeated squaring: *)
|
||||
let rec ipow_cnum x n =
|
||||
if (n == 0) then
|
||||
(Int 1, Int 0)
|
||||
else if (n < 0) then
|
||||
cinv (ipow_cnum x (- n))
|
||||
else if (n mod 2 == 0) then
|
||||
ipow_cnum (csqr x) (n / 2)
|
||||
else
|
||||
cmul x (ipow_cnum x (n - 1))
|
||||
|
||||
let twopi = 6.28318530717958647692528676655900576839433879875021164194989
|
||||
|
||||
(* Find the nth (complex) primitive root of unity by Newton's method: *)
|
||||
let primitive_root_of_unity n =
|
||||
let rec root_iter guess =
|
||||
let newguess = csub guess (cdiv (csub guess
|
||||
(ipow_cnum guess (1 - n)))
|
||||
(Int n)) in
|
||||
if (almost_equal_cnum guess newguess) then newguess
|
||||
else root_iter newguess
|
||||
in let float_to_num f = (Int (truncate (f *. 1.0e9))) // (Int 1000000000)
|
||||
in root_iter (float_to_num (cos (twopi /. (float n))),
|
||||
float_to_num (sin (twopi /. (float n))))
|
||||
|
||||
let cexp n i =
|
||||
if ((i mod n) == 0) then
|
||||
(one,zero)
|
||||
else
|
||||
let (n2,i2) = Util.lowest_terms n i
|
||||
in let (c,s) = ipow_cnum (primitive_root_of_unity n2) i2
|
||||
in (makeNum c, makeNum s)
|
||||
|
||||
let to_konst (N n) =
|
||||
let f = float_of_num n in
|
||||
let f' = if f < 0.0 then f *. (-1.0) else f in
|
||||
let f2 = if (f' >= 1.0) then (f' -. (float (truncate f'))) else f'
|
||||
in let q = string_of_int (truncate(f2 *. 1.0E9))
|
||||
in let r = "0000000000" ^ q
|
||||
in let l = String.length r
|
||||
in let prefix = if (f < 0.0) then "KN" else "KP" in
|
||||
if (f' >= 1.0) then
|
||||
(prefix ^ (string_of_int (truncate f')) ^ "_" ^
|
||||
(String.sub r (l - 9) 9))
|
||||
else
|
||||
(prefix ^ (String.sub r (l - 9) 9))
|
||||
|
||||
let to_string (N n) = approx_num_fix print_precision n
|
||||
|
||||
let to_float (N n) = float_of_num n
|
||||
|
||||
49
fftw-3.3.10/genfft/number.mli
Normal file
49
fftw-3.3.10/genfft/number.mli
Normal file
@@ -0,0 +1,49 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type number
|
||||
|
||||
val equal : number -> number -> bool
|
||||
val of_int : int -> number
|
||||
val zero : number
|
||||
val one : number
|
||||
val two : number
|
||||
val mone : number
|
||||
val is_zero : number -> bool
|
||||
val is_one : number -> bool
|
||||
val is_mone : number -> bool
|
||||
val is_two : number -> bool
|
||||
val mul : number -> number -> number
|
||||
val div : number -> number -> number
|
||||
val add : number -> number -> number
|
||||
val sub : number -> number -> number
|
||||
val negative : number -> bool
|
||||
val greater : number -> number -> bool
|
||||
val negate : number -> number
|
||||
val sqrt : number -> number
|
||||
|
||||
(* cexp n i = (cos (2 * pi * i / n), sin (2 * pi * i / n)) *)
|
||||
val cexp : int -> int -> (number * number)
|
||||
|
||||
val to_konst : number -> string
|
||||
val to_string : number -> string
|
||||
val to_float : number -> float
|
||||
|
||||
144
fftw-3.3.10/genfft/oracle.ml
Normal file
144
fftw-3.3.10/genfft/oracle.ml
Normal file
@@ -0,0 +1,144 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*
|
||||
* the oracle decrees whether the sign of an expression should
|
||||
* be changed.
|
||||
*
|
||||
* Say the expression (A - B) appears somewhere. Elsewhere in the
|
||||
* expression dag the expression (B - A) may appear.
|
||||
* The oracle determines which of the two forms is canonical.
|
||||
*
|
||||
* Algorithm: evaluate the expression at a random input, and
|
||||
* keep the expression with the positive sign.
|
||||
*)
|
||||
|
||||
let make_memoizer hash equal =
|
||||
let table = ref Assoctable.empty
|
||||
in
|
||||
(fun f k ->
|
||||
match Assoctable.lookup hash equal k !table with
|
||||
Some value -> value
|
||||
| None ->
|
||||
let value = f k in
|
||||
begin
|
||||
table := Assoctable.insert hash k value !table;
|
||||
value
|
||||
end)
|
||||
|
||||
let almost_equal x y =
|
||||
let epsilon = 1.0E-8 in
|
||||
(abs_float (x -. y) < epsilon) ||
|
||||
(abs_float (x -. y) < epsilon *. (abs_float x +. abs_float y))
|
||||
|
||||
let absid = make_memoizer
|
||||
(fun x -> Expr.hash_float (abs_float x))
|
||||
(fun a b -> almost_equal a b || almost_equal (-. a) b)
|
||||
(fun x -> x)
|
||||
|
||||
let make_random_oracle () = make_memoizer
|
||||
Variable.hash
|
||||
Variable.same
|
||||
(fun _ -> (float (Random.bits())) /. 1073741824.0)
|
||||
|
||||
let the_random_oracle = make_random_oracle ()
|
||||
|
||||
let sum_list l = List.fold_right (+.) l 0.0
|
||||
|
||||
let eval_aux random_oracle =
|
||||
let memoizing = make_memoizer Expr.hash (==) in
|
||||
let rec eval x =
|
||||
memoizing
|
||||
(function
|
||||
| Expr.Num x -> Number.to_float x
|
||||
| Expr.NaN x -> Expr.transcendent_to_float x
|
||||
| Expr.Load v -> random_oracle v
|
||||
| Expr.Store (v, x) -> eval x
|
||||
| Expr.Plus l -> sum_list (List.map eval l)
|
||||
| Expr.Times (a, b) -> (eval a) *. (eval b)
|
||||
| Expr.CTimes (a, b) ->
|
||||
1.098612288668109691395245236 +.
|
||||
1.609437912434100374600759333 *. (eval a) *. (eval b)
|
||||
| Expr.CTimesJ (a, b) ->
|
||||
0.9102392266268373936142401657 +.
|
||||
0.6213349345596118107071993881 *. (eval a) *. (eval b)
|
||||
| Expr.Uminus x -> -. (eval x))
|
||||
x
|
||||
in eval
|
||||
|
||||
let eval = eval_aux the_random_oracle
|
||||
|
||||
let should_flip_sign node =
|
||||
let v = eval node in
|
||||
let v' = absid v in
|
||||
not (almost_equal v v')
|
||||
|
||||
(*
|
||||
* determine with high probability if two expressions are equal.
|
||||
*
|
||||
* The test is randomized: if the two expressions have the
|
||||
* same value for NTESTS random inputs, then they are proclaimed
|
||||
* equal. (Note that two distinct linear functions L1(x0, x1, ..., xn)
|
||||
* and L2(x0, x1, ..., xn) have the same value with probability
|
||||
* 0 for random x's, and thus this test is way more paranoid than
|
||||
* necessary.)
|
||||
*)
|
||||
let likely_equal a b =
|
||||
let tolerance = 1.0e-8
|
||||
and ntests = 20
|
||||
in
|
||||
let rec loop n =
|
||||
if n = 0 then
|
||||
true
|
||||
else
|
||||
let r = make_random_oracle () in
|
||||
let va = eval_aux r a
|
||||
and vb = eval_aux r b
|
||||
in
|
||||
if (abs_float (va -. vb)) >
|
||||
tolerance *. (abs_float va +. abs_float vb +. 0.0001)
|
||||
then
|
||||
false
|
||||
else
|
||||
loop (n - 1)
|
||||
in
|
||||
match (a, b) with
|
||||
|
||||
(*
|
||||
* Because of the way eval is constructed, we have
|
||||
* eval (Store (v, x)) == eval x
|
||||
* However, we never consider the two expressions equal
|
||||
*)
|
||||
| (Expr.Store _, _) -> false
|
||||
| (_, Expr.Store _) -> false
|
||||
|
||||
(*
|
||||
* Expressions of the form ``Uminus (Store _)''
|
||||
* are artifacts of algsimp
|
||||
*)
|
||||
| ((Expr.Uminus (Expr.Store _)), _) -> false
|
||||
| (_, Expr.Uminus (Expr.Store _)) -> false
|
||||
|
||||
| _ -> loop ntests
|
||||
|
||||
let hash x =
|
||||
let f = eval x in
|
||||
truncate (f *. 65536.0)
|
||||
24
fftw-3.3.10/genfft/oracle.mli
Normal file
24
fftw-3.3.10/genfft/oracle.mli
Normal file
@@ -0,0 +1,24 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val should_flip_sign : Expr.expr -> bool
|
||||
val likely_equal : Expr.expr -> Expr.expr -> bool
|
||||
val hash : Expr.expr -> int
|
||||
236
fftw-3.3.10/genfft/schedule.ml
Normal file
236
fftw-3.3.10/genfft/schedule.ml
Normal file
@@ -0,0 +1,236 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* This file contains the instruction scheduler, which finds an
|
||||
efficient ordering for a given list of instructions.
|
||||
|
||||
The scheduler analyzes the DAG (directed acyclic graph) formed by
|
||||
the instruction dependencies, and recursively partitions it. The
|
||||
resulting schedule data structure expresses a "good" ordering
|
||||
and structure for the computation.
|
||||
|
||||
The scheduler makes use of utilties in Dag and other packages to
|
||||
manipulate the Dag and the instruction list. *)
|
||||
|
||||
open Dag
|
||||
(*************************************************
|
||||
* Dag scheduler
|
||||
*************************************************)
|
||||
let to_assignment node = (Expr.Assign (node.assigned, node.expression))
|
||||
let makedag l = Dag.makedag
|
||||
(List.map (function Expr.Assign (v, x) -> (v, x)) l)
|
||||
|
||||
let return x = x
|
||||
let has_color c n = (n.color = c)
|
||||
let set_color c n = (n.color <- c)
|
||||
let has_either_color c1 c2 n = (n.color = c1 || n.color = c2)
|
||||
|
||||
let infinity = 100000
|
||||
|
||||
let cc dag inputs =
|
||||
begin
|
||||
Dag.for_all dag (fun node ->
|
||||
node.label <- infinity);
|
||||
|
||||
(match inputs with
|
||||
a :: _ -> bfs dag a 0
|
||||
| _ -> failwith "connected");
|
||||
|
||||
return
|
||||
((List.map to_assignment (List.filter (fun n -> n.label < infinity)
|
||||
(Dag.to_list dag))),
|
||||
(List.map to_assignment (List.filter (fun n -> n.label == infinity)
|
||||
(Dag.to_list dag))))
|
||||
end
|
||||
|
||||
let rec connected_components alist =
|
||||
let dag = makedag alist in
|
||||
let inputs =
|
||||
List.filter (fun node -> Util.null node.predecessors)
|
||||
(Dag.to_list dag) in
|
||||
match cc dag inputs with
|
||||
(a, []) -> [a]
|
||||
| (a, b) -> a :: connected_components b
|
||||
|
||||
let single_load node =
|
||||
match (node.input_variables, node.predecessors) with
|
||||
([x], []) ->
|
||||
Variable.is_constant x ||
|
||||
(!Magic.locations_are_special && Variable.is_locative x)
|
||||
| _ -> false
|
||||
|
||||
let loads_locative node =
|
||||
match (node.input_variables, node.predecessors) with
|
||||
| ([x], []) -> Variable.is_locative x
|
||||
| _ -> false
|
||||
|
||||
let partition alist =
|
||||
let dag = makedag alist in
|
||||
let dag' = Dag.to_list dag in
|
||||
let inputs =
|
||||
List.filter (fun node -> Util.null node.predecessors) dag'
|
||||
and outputs =
|
||||
List.filter (fun node -> Util.null node.successors) dag'
|
||||
and special_inputs = List.filter single_load dag' in
|
||||
begin
|
||||
|
||||
let c = match !Magic.schedule_type with
|
||||
| 1 -> RED; (* all nodes in the input partition *)
|
||||
| -1 -> BLUE; (* all nodes in the output partition *)
|
||||
| _ -> BLACK; (* node color determined by bisection algorithm *)
|
||||
in Dag.for_all dag (fun node -> node.color <- c);
|
||||
|
||||
Util.for_list inputs (set_color RED);
|
||||
|
||||
(*
|
||||
The special inputs are those input nodes that load a single
|
||||
location or twiddle factor. Special inputs can end up either
|
||||
in the blue or in the red part. These inputs are special
|
||||
because they inherit a color from their neighbors: If a red
|
||||
node needs a special input, the special input becomes red, but
|
||||
if all successors of a special input are blue, the special
|
||||
input becomes blue. Outputs are always blue, whether they be
|
||||
special or not.
|
||||
|
||||
Because of the processing of special inputs, however, the final
|
||||
partition might end up being composed only of blue nodes (which
|
||||
is incorrect). In this case we manually reset all inputs
|
||||
(whether special or not) to be red.
|
||||
*)
|
||||
|
||||
Util.for_list special_inputs (set_color YELLOW);
|
||||
|
||||
Util.for_list outputs (set_color BLUE);
|
||||
|
||||
let rec loopi donep =
|
||||
match (List.filter
|
||||
(fun node -> (has_color BLACK node) &&
|
||||
List.for_all (has_either_color RED YELLOW) node.predecessors)
|
||||
dag') with
|
||||
[] -> if (donep) then () else loopo true
|
||||
| i ->
|
||||
begin
|
||||
Util.for_list i (fun node ->
|
||||
begin
|
||||
set_color RED node;
|
||||
Util.for_list node.predecessors (set_color RED);
|
||||
end);
|
||||
loopo false;
|
||||
end
|
||||
|
||||
and loopo donep =
|
||||
match (List.filter
|
||||
(fun node -> (has_either_color BLACK YELLOW node) &&
|
||||
List.for_all (has_color BLUE) node.successors)
|
||||
dag') with
|
||||
[] -> if (donep) then () else loopi true
|
||||
| o ->
|
||||
begin
|
||||
Util.for_list o (set_color BLUE);
|
||||
loopi false;
|
||||
end
|
||||
|
||||
in loopi false;
|
||||
|
||||
(* fix the partition if it is incorrect *)
|
||||
if not (List.exists (has_color RED) dag') then
|
||||
Util.for_list inputs (set_color RED);
|
||||
|
||||
return
|
||||
((List.map to_assignment (List.filter (has_color RED) dag')),
|
||||
(List.map to_assignment (List.filter (has_color BLUE) dag')))
|
||||
end
|
||||
|
||||
type schedule =
|
||||
Done
|
||||
| Instr of Expr.assignment
|
||||
| Seq of (schedule * schedule)
|
||||
| Par of schedule list
|
||||
|
||||
|
||||
|
||||
(* produce a sequential schedule determined by the user *)
|
||||
let rec sequentially = function
|
||||
[] -> Done
|
||||
| a :: b -> Seq (Instr a, sequentially b)
|
||||
|
||||
let schedule =
|
||||
let rec schedule_alist = function
|
||||
| [] -> Done
|
||||
| [a] -> Instr a
|
||||
| alist -> match connected_components alist with
|
||||
| ([a]) -> schedule_connected a
|
||||
| l -> Par (List.map schedule_alist l)
|
||||
|
||||
and schedule_connected alist =
|
||||
match partition alist with
|
||||
| (a, b) -> Seq (schedule_alist a, schedule_alist b)
|
||||
|
||||
in fun x ->
|
||||
let () = Util.info "begin schedule" in
|
||||
let res = schedule_alist x in
|
||||
let () = Util.info "end schedule" in
|
||||
res
|
||||
|
||||
|
||||
(* partition a dag into two parts:
|
||||
|
||||
1) the set of loads from locatives and their successors,
|
||||
2) all other nodes
|
||||
|
||||
This step separates the ``body'' of the dag, which computes the
|
||||
actual fft, from the ``precomputations'' part, which computes e.g.
|
||||
twiddle factors.
|
||||
*)
|
||||
let partition_precomputations alist =
|
||||
let dag = makedag alist in
|
||||
let dag' = Dag.to_list dag in
|
||||
let loads = List.filter loads_locative dag' in
|
||||
begin
|
||||
|
||||
Dag.for_all dag (set_color BLUE);
|
||||
Util.for_list loads (set_color RED);
|
||||
|
||||
let rec loop () =
|
||||
match (List.filter
|
||||
(fun node -> (has_color RED node) &&
|
||||
List.exists (has_color BLUE) node.successors)
|
||||
dag') with
|
||||
[] -> ()
|
||||
| i ->
|
||||
begin
|
||||
Util.for_list i
|
||||
(fun node ->
|
||||
Util.for_list node.successors (set_color RED));
|
||||
loop ()
|
||||
end
|
||||
|
||||
in loop ();
|
||||
|
||||
return
|
||||
((List.map to_assignment (List.filter (has_color BLUE) dag')),
|
||||
(List.map to_assignment (List.filter (has_color RED) dag')))
|
||||
end
|
||||
|
||||
let isolate_precomputations_and_schedule alist =
|
||||
let (a, b) = partition_precomputations alist in
|
||||
Seq (schedule a, schedule b)
|
||||
|
||||
30
fftw-3.3.10/genfft/schedule.mli
Normal file
30
fftw-3.3.10/genfft/schedule.mli
Normal file
@@ -0,0 +1,30 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type schedule =
|
||||
| Done
|
||||
| Instr of Expr.assignment
|
||||
| Seq of (schedule * schedule)
|
||||
| Par of schedule list
|
||||
|
||||
val schedule : Expr.assignment list -> schedule
|
||||
val sequentially : Expr.assignment list -> schedule
|
||||
val isolate_precomputations_and_schedule : Expr.assignment list -> schedule
|
||||
215
fftw-3.3.10/genfft/simd.ml
Normal file
215
fftw-3.3.10/genfft/simd.ml
Normal file
@@ -0,0 +1,215 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
open Expr
|
||||
open List
|
||||
open Printf
|
||||
open Variable
|
||||
open Annotate
|
||||
open Simdmagic
|
||||
open C
|
||||
|
||||
let realtype = "V"
|
||||
let realtypep = realtype ^ " *"
|
||||
let constrealtype = "const " ^ realtype
|
||||
let constrealtypep = constrealtype ^ " *"
|
||||
let alignment_mod = 2
|
||||
|
||||
(*
|
||||
* SIMD C AST unparser
|
||||
*)
|
||||
let foldr_string_concat l = fold_right (^) l ""
|
||||
|
||||
let rec unparse_by_twiddle nam tw src =
|
||||
sprintf "%s(&(%s),%s)" nam (Variable.unparse tw) (unparse_expr src)
|
||||
|
||||
and unparse_store dst = function
|
||||
| Times (NaN MULTI_A, x) ->
|
||||
sprintf "STM%d(&(%s),%s,%s,&(%s));\n"
|
||||
!Simdmagic.store_multiple
|
||||
(Variable.unparse dst) (unparse_expr x)
|
||||
(Variable.vstride_of_locative dst)
|
||||
(Variable.unparse_for_alignment alignment_mod dst)
|
||||
| Times (NaN MULTI_B, Plus stuff) ->
|
||||
sprintf "STN%d(&(%s)%s,%s);\n"
|
||||
!Simdmagic.store_multiple
|
||||
(Variable.unparse dst)
|
||||
(List.fold_right (fun x a -> "," ^ (unparse_expr x) ^ a) stuff "")
|
||||
(Variable.vstride_of_locative dst)
|
||||
| src_expr ->
|
||||
sprintf "ST(&(%s),%s,%s,&(%s));\n"
|
||||
(Variable.unparse dst) (unparse_expr src_expr)
|
||||
(Variable.vstride_of_locative dst)
|
||||
(Variable.unparse_for_alignment alignment_mod dst)
|
||||
|
||||
and unparse_expr =
|
||||
let rec unparse_plus = function
|
||||
| [a] -> unparse_expr a
|
||||
|
||||
| (Uminus (Times (NaN I, b))) :: c :: d -> op2 "VFNMSI" [b] (c :: d)
|
||||
| c :: (Uminus (Times (NaN I, b))) :: d -> op2 "VFNMSI" [b] (c :: d)
|
||||
| (Uminus (Times (NaN CONJ, b))) :: c :: d -> op2 "VFNMSCONJ" [b] (c :: d)
|
||||
| c :: (Uminus (Times (NaN CONJ, b))) :: d -> op2 "VFNMSCONJ" [b] (c :: d)
|
||||
| (Times (NaN I, b)) :: c :: d -> op2 "VFMAI" [b] (c :: d)
|
||||
| c :: (Times (NaN I, b)) :: d -> op2 "VFMAI" [b] (c :: d)
|
||||
| (Times (NaN CONJ, b)) :: (Uminus c) :: d -> op2 "VFMSCONJ" [b] (c :: d)
|
||||
| (Uminus c) :: (Times (NaN CONJ, b)) :: d -> op2 "VFMSCONJ" [b] (c :: d)
|
||||
| (Times (NaN CONJ, b)) :: c :: d -> op2 "VFMACONJ" [b] (c :: d)
|
||||
| c :: (Times (NaN CONJ, b)) :: d -> op2 "VFMACONJ" [b] (c :: d)
|
||||
| (Times (NaN _, b)) :: (Uminus c) :: d -> failwith "VFMS NaN"
|
||||
| (Uminus c) :: (Times (NaN _, b)) :: d -> failwith "VFMS NaN"
|
||||
|
||||
| (Uminus (Times (a, b))) :: c :: d -> op3 "VFNMS" a b (c :: d)
|
||||
| c :: (Uminus (Times (a, b))) :: d -> op3 "VFNMS" a b (c :: d)
|
||||
| (Times (a, b)) :: (Uminus c) :: d -> op3 "VFMS" a b (c :: negate d)
|
||||
| (Uminus c) :: (Times (a, b)) :: d -> op3 "VFMS" a b (c :: negate d)
|
||||
| (Times (a, b)) :: c :: d -> op3 "VFMA" a b (c :: d)
|
||||
| c :: (Times (a, b)) :: d -> op3 "VFMA" a b (c :: d)
|
||||
|
||||
| (Uminus a :: b) -> op2 "VSUB" b [a]
|
||||
| (b :: Uminus a :: c) -> op2 "VSUB" (b :: c) [a]
|
||||
| (a :: b) -> op2 "VADD" [a] b
|
||||
| [] -> failwith "unparse_plus"
|
||||
and op3 nam a b c =
|
||||
nam ^ "(" ^ (unparse_expr a) ^ ", " ^ (unparse_expr b) ^ ", " ^
|
||||
(unparse_plus c) ^ ")"
|
||||
and op2 nam a b =
|
||||
nam ^ "(" ^ (unparse_plus a) ^ ", " ^ (unparse_plus b) ^ ")"
|
||||
and op1 nam a =
|
||||
nam ^ "(" ^ (unparse_expr a) ^ ")"
|
||||
and negate = function
|
||||
| [] -> []
|
||||
| (Uminus x) :: y -> x :: negate y
|
||||
| x :: y -> (Uminus x) :: negate y
|
||||
|
||||
in function
|
||||
| CTimes(Load tw, src)
|
||||
when Variable.is_constant tw && !Magic.generate_bytw ->
|
||||
unparse_by_twiddle "BYTW" tw src
|
||||
| CTimesJ(Load tw, src)
|
||||
when Variable.is_constant tw && !Magic.generate_bytw ->
|
||||
unparse_by_twiddle "BYTWJ" tw src
|
||||
| Load v when is_locative(v) ->
|
||||
sprintf "LD(&(%s), %s, &(%s))" (Variable.unparse v)
|
||||
(Variable.vstride_of_locative v)
|
||||
(Variable.unparse_for_alignment alignment_mod v)
|
||||
| Load v when is_constant(v) -> sprintf "LDW(&(%s))" (Variable.unparse v)
|
||||
| Load v -> Variable.unparse v
|
||||
| Num n -> sprintf "LDK(%s)" (Number.to_konst n)
|
||||
| NaN n -> failwith "NaN in unparse_expr"
|
||||
| Plus [] -> "0.0 /* bug */"
|
||||
| Plus [a] -> " /* bug */ " ^ (unparse_expr a)
|
||||
| Plus a -> unparse_plus a
|
||||
| Times(NaN I,b) -> op1 "VBYI" b
|
||||
| Times(NaN CONJ,b) -> op1 "VCONJ" b
|
||||
| Times(a,b) ->
|
||||
sprintf "VMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
|
||||
| CTimes(a,Times(NaN I, b)) ->
|
||||
sprintf "VZMULI(%s, %s)" (unparse_expr a) (unparse_expr b)
|
||||
| CTimes(a,b) ->
|
||||
sprintf "VZMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
|
||||
| CTimesJ(a,Times(NaN I, b)) ->
|
||||
sprintf "VZMULIJ(%s, %s)" (unparse_expr a) (unparse_expr b)
|
||||
| CTimesJ(a,b) ->
|
||||
sprintf "VZMULJ(%s, %s)" (unparse_expr a) (unparse_expr b)
|
||||
| Uminus a when !Magic.vneg -> op1 "VNEG" a
|
||||
| Uminus a -> failwith "SIMD Uminus"
|
||||
| _ -> failwith "unparse_expr"
|
||||
|
||||
and unparse_decl x = C.unparse_decl x
|
||||
|
||||
and unparse_ast ast =
|
||||
let rec unparse_assignment = function
|
||||
| Assign (v, x) when Variable.is_locative v ->
|
||||
unparse_store v x
|
||||
| Assign (v, x) ->
|
||||
(Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n"
|
||||
|
||||
and unparse_annotated force_bracket =
|
||||
let rec unparse_code = function
|
||||
| ADone -> ""
|
||||
| AInstr i -> unparse_assignment i
|
||||
| ASeq (a, b) ->
|
||||
(unparse_annotated false a) ^ (unparse_annotated false b)
|
||||
and declare_variables l =
|
||||
let rec uvar = function
|
||||
[] -> failwith "uvar"
|
||||
| [v] -> (Variable.unparse v) ^ ";\n"
|
||||
| a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b)
|
||||
in let rec vvar l =
|
||||
let s = if !Magic.compact then 15 else 1 in
|
||||
if (List.length l <= s) then
|
||||
match l with
|
||||
[] -> ""
|
||||
| _ -> realtype ^ " " ^ (uvar l)
|
||||
else
|
||||
(vvar (Util.take s l)) ^ (vvar (Util.drop s l))
|
||||
in vvar (List.filter Variable.is_temporary l)
|
||||
in function
|
||||
Annotate (_, _, decl, _, code) ->
|
||||
if (not force_bracket) && (Util.null decl) then
|
||||
unparse_code code
|
||||
else "{\n" ^
|
||||
(declare_variables decl) ^
|
||||
(unparse_code code) ^
|
||||
"}\n"
|
||||
|
||||
in match ast with
|
||||
| Asch a -> (unparse_annotated true a)
|
||||
| Return x -> "return " ^ unparse_ast x ^ ";"
|
||||
| Simd_leavefun -> "VLEAVE();"
|
||||
| For (a, b, c, d) ->
|
||||
"for (" ^
|
||||
unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c
|
||||
^ ")" ^ unparse_ast d
|
||||
| If (a, d) ->
|
||||
"if (" ^
|
||||
unparse_ast a
|
||||
^ ")" ^ unparse_ast d
|
||||
| Block (d, s) ->
|
||||
if (s == []) then ""
|
||||
else
|
||||
"{\n" ^
|
||||
foldr_string_concat (map unparse_decl d) ^
|
||||
foldr_string_concat (map unparse_ast s) ^
|
||||
"}\n"
|
||||
| x -> C.unparse_ast x
|
||||
|
||||
and unparse_function = function
|
||||
Fcn (typ, name, args, body) ->
|
||||
let rec unparse_args = function
|
||||
[Decl (a, b)] -> a ^ " " ^ b
|
||||
| (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", "
|
||||
^ unparse_args s
|
||||
| [] -> ""
|
||||
| _ -> failwith "unparse_function"
|
||||
in
|
||||
(typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^
|
||||
unparse_ast body)
|
||||
|
||||
let extract_constants f =
|
||||
let constlist = flatten (map expr_to_constants (C.ast_to_expr_list f))
|
||||
in map
|
||||
(fun n ->
|
||||
Tdecl
|
||||
("DVK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^
|
||||
");\n"))
|
||||
(unique_constants constlist)
|
||||
28
fftw-3.3.10/genfft/simd.mli
Normal file
28
fftw-3.3.10/genfft/simd.mli
Normal file
@@ -0,0 +1,28 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val unparse_function : C.c_fcn -> string
|
||||
val extract_constants : C.c_ast -> C.c_decl list
|
||||
val realtype : string
|
||||
val realtypep : string
|
||||
val constrealtype : string
|
||||
val constrealtypep : string
|
||||
|
||||
31
fftw-3.3.10/genfft/simdmagic.ml
Normal file
31
fftw-3.3.10/genfft/simdmagic.ml
Normal file
@@ -0,0 +1,31 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* SIMD magic parameters *)
|
||||
let simd_mode = ref false
|
||||
let store_multiple = ref 1
|
||||
|
||||
open Magic
|
||||
|
||||
let speclist = [
|
||||
"-simd", set_bool simd_mode, undocumented;
|
||||
"-store-multiple", set_int store_multiple, undocumented;
|
||||
]
|
||||
288
fftw-3.3.10/genfft/to_alist.ml
Normal file
288
fftw-3.3.10/genfft/to_alist.ml
Normal file
@@ -0,0 +1,288 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(*************************************************************
|
||||
* Conversion of the dag to an assignment list
|
||||
*************************************************************)
|
||||
(*
|
||||
* This function is messy. The main problem is that we want to
|
||||
* inline dag nodes conditionally, depending on how many times they
|
||||
* are used. The Right Thing to do would be to modify the
|
||||
* state monad to propagate some of the state backwards, so that
|
||||
* we know whether a given node will be used again in the future.
|
||||
* This modification is trivial in a lazy language, but it is
|
||||
* messy in a strict language like ML.
|
||||
*
|
||||
* In this implementation, we just do the obvious thing, i.e., visit
|
||||
* the dag twice, the first to count the node usages, and the second to
|
||||
* produce the output.
|
||||
*)
|
||||
|
||||
open Monads.StateMonad
|
||||
open Monads.MemoMonad
|
||||
open Expr
|
||||
|
||||
let fresh = Variable.make_temporary
|
||||
let node_insert x = Assoctable.insert Expr.hash x
|
||||
let node_lookup x = Assoctable.lookup Expr.hash (==) x
|
||||
let empty = Assoctable.empty
|
||||
|
||||
let fetchAl =
|
||||
fetchState >>= (fun (al, _, _) -> returnM al)
|
||||
|
||||
let storeAl al =
|
||||
fetchState >>= (fun (_, visited, visited') ->
|
||||
storeState (al, visited, visited'))
|
||||
|
||||
let fetchVisited = fetchState >>= (fun (_, v, _) -> returnM v)
|
||||
|
||||
let storeVisited visited =
|
||||
fetchState >>= (fun (al, _, visited') ->
|
||||
storeState (al, visited, visited'))
|
||||
|
||||
let fetchVisited' = fetchState >>= (fun (_, _, v') -> returnM v')
|
||||
let storeVisited' visited' =
|
||||
fetchState >>= (fun (al, visited, _) ->
|
||||
storeState (al, visited, visited'))
|
||||
let lookupVisitedM' key =
|
||||
fetchVisited' >>= fun table ->
|
||||
returnM (node_lookup key table)
|
||||
let insertVisitedM' key value =
|
||||
fetchVisited' >>= fun table ->
|
||||
storeVisited' (node_insert key value table)
|
||||
|
||||
let counting f x =
|
||||
fetchVisited >>= (fun v ->
|
||||
match node_lookup x v with
|
||||
Some count ->
|
||||
let incr_cnt =
|
||||
fetchVisited >>= (fun v' ->
|
||||
storeVisited (node_insert x (count + 1) v'))
|
||||
in
|
||||
begin
|
||||
match x with
|
||||
(* Uminus is always inlined. Visit child *)
|
||||
Uminus y -> f y >> incr_cnt
|
||||
| _ -> incr_cnt
|
||||
end
|
||||
| None ->
|
||||
f x >> fetchVisited >>= (fun v' ->
|
||||
storeVisited (node_insert x 1 v')))
|
||||
|
||||
let with_varM v x =
|
||||
fetchAl >>= (fun al -> storeAl ((v, x) :: al)) >> returnM (Load v)
|
||||
|
||||
let inlineM = returnM
|
||||
|
||||
let with_tempM x = match x with
|
||||
| Load v when Variable.is_temporary v -> inlineM x (* avoid trivial moves *)
|
||||
| _ -> with_varM (fresh ()) x
|
||||
|
||||
(* declare a temporary only if node is used more than once *)
|
||||
let with_temp_maybeM node x =
|
||||
fetchVisited >>= (fun v ->
|
||||
match node_lookup node v with
|
||||
Some count ->
|
||||
if (count = 1 && !Magic.inline_single) then
|
||||
inlineM x
|
||||
else
|
||||
with_tempM x
|
||||
| None ->
|
||||
failwith "with_temp_maybeM")
|
||||
type fma =
|
||||
NO_FMA
|
||||
| FMA of expr * expr * expr (* FMA (a, b, c) => a + b * c *)
|
||||
| FMS of expr * expr * expr (* FMS (a, b, c) => -a + b * c *)
|
||||
| FNMS of expr * expr * expr (* FNMS (a, b, c) => a - b * c *)
|
||||
|
||||
let good_for_fma (a, b) =
|
||||
let good = function
|
||||
| NaN I -> true
|
||||
| NaN CONJ -> true
|
||||
| NaN _ -> false
|
||||
| Times(NaN _, _) -> false
|
||||
| Times(_, NaN _) -> false
|
||||
| _ -> true
|
||||
in good a && good b
|
||||
|
||||
let build_fma l =
|
||||
if (not !Magic.enable_fma) then NO_FMA
|
||||
else match l with
|
||||
| [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> FNMS (a, b, c)
|
||||
| [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> FNMS (a, b, c)
|
||||
| [Uminus a; Times (b, c)] when good_for_fma (b, c) -> FMS (a, b, c)
|
||||
| [Times (b, c); Uminus a] when good_for_fma (b, c) -> FMS (a, b, c)
|
||||
| [a; Times (b, c)] when good_for_fma (b, c) -> FMA (a, b, c)
|
||||
| [Times (b, c); a] when good_for_fma (b, c) -> FMA (a, b, c)
|
||||
| _ -> NO_FMA
|
||||
|
||||
let children_fma l = match build_fma l with
|
||||
| FMA (a, b, c) -> Some (a, b, c)
|
||||
| FMS (a, b, c) -> Some (a, b, c)
|
||||
| FNMS (a, b, c) -> Some (a, b, c)
|
||||
| NO_FMA -> None
|
||||
|
||||
|
||||
let rec visitM x =
|
||||
counting (function
|
||||
| Load v -> returnM ()
|
||||
| Num a -> returnM ()
|
||||
| NaN a -> returnM ()
|
||||
| Store (v, x) -> visitM x
|
||||
| Plus a -> (match children_fma a with
|
||||
None -> mapM visitM a >> returnM ()
|
||||
| Some (a, b, c) ->
|
||||
(* visit fma's arguments twice to make sure they are not inlined *)
|
||||
visitM a >> visitM a >>
|
||||
visitM b >> visitM b >>
|
||||
visitM c >> visitM c)
|
||||
| Times (a, b) -> visitM a >> visitM b
|
||||
| CTimes (a, b) -> visitM a >> visitM b
|
||||
| CTimesJ (a, b) -> visitM a >> visitM b
|
||||
| Uminus a -> visitM a)
|
||||
x
|
||||
|
||||
let visit_rootsM = mapM visitM
|
||||
|
||||
|
||||
let rec expr_of_nodeM x =
|
||||
memoizing lookupVisitedM' insertVisitedM'
|
||||
(function x -> match x with
|
||||
| Load v ->
|
||||
if (Variable.is_temporary v) then
|
||||
inlineM (Load v)
|
||||
else if (Variable.is_locative v && !Magic.inline_loads) then
|
||||
inlineM (Load v)
|
||||
else if (Variable.is_constant v && !Magic.inline_loads_constants) then
|
||||
inlineM (Load v)
|
||||
else
|
||||
with_tempM (Load v)
|
||||
| Num a ->
|
||||
if !Magic.inline_constants then
|
||||
inlineM (Num a)
|
||||
else
|
||||
with_temp_maybeM x (Num a)
|
||||
| NaN a -> inlineM (NaN a)
|
||||
| Store (v, x) ->
|
||||
expr_of_nodeM x >>=
|
||||
(if !Magic.trivial_stores then with_tempM else inlineM) >>=
|
||||
with_varM v
|
||||
|
||||
| Plus a ->
|
||||
begin
|
||||
match build_fma a with
|
||||
FMA (a, b, c) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
expr_of_nodeM c >>= fun c' ->
|
||||
with_temp_maybeM x (Plus [a'; Times (b', c')])
|
||||
| FMS (a, b, c) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
expr_of_nodeM c >>= fun c' ->
|
||||
with_temp_maybeM x
|
||||
(Plus [Times (b', c'); Uminus a'])
|
||||
| FNMS (a, b, c) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
expr_of_nodeM c >>= fun c' ->
|
||||
with_temp_maybeM x
|
||||
(Plus [a'; Uminus (Times (b', c'))])
|
||||
| NO_FMA ->
|
||||
mapM expr_of_nodeM a >>= fun a' ->
|
||||
with_temp_maybeM x (Plus a')
|
||||
end
|
||||
| CTimes (Load _ as a, b) when !Magic.generate_bytw ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
with_tempM (CTimes (a, b'))
|
||||
| CTimes (a, b) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
with_tempM (CTimes (a', b'))
|
||||
| CTimesJ (Load _ as a, b) when !Magic.generate_bytw ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
with_tempM (CTimesJ (a, b'))
|
||||
| CTimesJ (a, b) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
with_tempM (CTimesJ (a', b'))
|
||||
| Times (a, b) ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
expr_of_nodeM b >>= fun b' ->
|
||||
begin
|
||||
match a' with
|
||||
Num a'' when !Magic.strength_reduce_mul && Number.is_two a'' ->
|
||||
(inlineM b' >>= fun b'' ->
|
||||
with_temp_maybeM x (Plus [b''; b'']))
|
||||
| _ -> with_temp_maybeM x (Times (a', b'))
|
||||
end
|
||||
| Uminus a ->
|
||||
expr_of_nodeM a >>= fun a' ->
|
||||
inlineM (Uminus a'))
|
||||
x
|
||||
|
||||
let expr_of_rootsM = mapM expr_of_nodeM
|
||||
|
||||
let peek_alistM roots =
|
||||
visit_rootsM roots >> expr_of_rootsM roots >> fetchAl
|
||||
|
||||
let wrap_assign (a, b) = Expr.Assign (a, b)
|
||||
|
||||
let to_assignments dag =
|
||||
let () = Util.info "begin to_alist" in
|
||||
let al = List.rev (runM ([], empty, empty) peek_alistM dag) in
|
||||
let res = List.map wrap_assign al in
|
||||
let () = Util.info "end to_alist" in
|
||||
res
|
||||
|
||||
|
||||
(* dump alist in `dot' format *)
|
||||
let dump print alist =
|
||||
let vs v = "\"" ^ (Variable.unparse v) ^ "\"" in
|
||||
begin
|
||||
print "digraph G {\n";
|
||||
print "\tsize=\"6,6\";\n";
|
||||
|
||||
(* all input nodes have the same rank *)
|
||||
print "{ rank = same;\n";
|
||||
List.iter (fun (Expr.Assign (v, x)) ->
|
||||
List.iter (fun y ->
|
||||
if (Variable.is_locative y) then print("\t" ^ (vs y) ^ ";\n"))
|
||||
(Expr.find_vars x))
|
||||
alist;
|
||||
print "}\n";
|
||||
|
||||
(* all output nodes have the same rank *)
|
||||
print "{ rank = same;\n";
|
||||
List.iter (fun (Expr.Assign (v, x)) ->
|
||||
if (Variable.is_locative v) then print("\t" ^ (vs v) ^ ";\n"))
|
||||
alist;
|
||||
print "}\n";
|
||||
|
||||
(* edges *)
|
||||
List.iter (fun (Expr.Assign (v, x)) ->
|
||||
List.iter (fun y -> print("\t" ^ (vs y) ^ " -> " ^ (vs v) ^ ";\n"))
|
||||
(Expr.find_vars x))
|
||||
alist;
|
||||
|
||||
print "}\n";
|
||||
end
|
||||
|
||||
24
fftw-3.3.10/genfft/to_alist.mli
Normal file
24
fftw-3.3.10/genfft/to_alist.mli
Normal file
@@ -0,0 +1,24 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val to_assignments : Expr.expr list -> Expr.assignment list
|
||||
val dump : (string -> unit) -> Expr.assignment list -> unit
|
||||
val good_for_fma : Expr.expr * Expr.expr -> bool
|
||||
152
fftw-3.3.10/genfft/trig.ml
Normal file
152
fftw-3.3.10/genfft/trig.ml
Normal file
@@ -0,0 +1,152 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* trigonometric transforms *)
|
||||
open Util
|
||||
|
||||
(* DFT of real input *)
|
||||
let rdft sign n input =
|
||||
Fft.dft sign n (Complex.real @@ input)
|
||||
|
||||
(* DFT of hermitian input *)
|
||||
let hdft sign n input =
|
||||
Fft.dft sign n (Complex.hermitian n input)
|
||||
|
||||
(* DFT real transform of vectors of two real numbers,
|
||||
multiplication by (NaN I), and summation *)
|
||||
let dft_via_rdft sign n input =
|
||||
let f = rdft sign n input
|
||||
in fun i ->
|
||||
Complex.plus
|
||||
[Complex.real (f i);
|
||||
Complex.times (Complex.nan Expr.I) (Complex.imag (f i))]
|
||||
|
||||
(* Discrete Hartley Transform *)
|
||||
let dht sign n input =
|
||||
let f = Fft.dft sign n (Complex.real @@ input) in
|
||||
(fun i ->
|
||||
Complex.plus [Complex.real (f i); Complex.imag (f i)])
|
||||
|
||||
let trigI n input =
|
||||
let twon = 2 * n in
|
||||
let input' = Complex.hermitian twon input
|
||||
in
|
||||
Fft.dft 1 twon input'
|
||||
|
||||
let interleave_zero input = fun i ->
|
||||
if (i mod 2) == 0
|
||||
then Complex.zero
|
||||
else
|
||||
input ((i - 1) / 2)
|
||||
|
||||
let trigII n input =
|
||||
let fourn = 4 * n in
|
||||
let input' = Complex.hermitian fourn (interleave_zero input)
|
||||
in
|
||||
Fft.dft 1 fourn input'
|
||||
|
||||
let trigIII n input =
|
||||
let fourn = 4 * n in
|
||||
let twon = 2 * n in
|
||||
let input' = Complex.hermitian fourn
|
||||
(fun i ->
|
||||
if (i == 0) then
|
||||
Complex.real (input 0)
|
||||
else if (i == twon) then
|
||||
Complex.uminus (Complex.real (input 0))
|
||||
else
|
||||
Complex.antihermitian twon input i)
|
||||
in
|
||||
let dft = Fft.dft 1 fourn input'
|
||||
in fun k -> dft (2 * k + 1)
|
||||
|
||||
let zero_extend n input = fun i ->
|
||||
if (i >= 0 && i < n)
|
||||
then input i
|
||||
else Complex.zero
|
||||
|
||||
let trigIV n input =
|
||||
let fourn = 4 * n
|
||||
and eightn = 8 * n in
|
||||
let input' = Complex.hermitian eightn
|
||||
(zero_extend fourn (Complex.antihermitian fourn
|
||||
(interleave_zero input)))
|
||||
in
|
||||
let dft = Fft.dft 1 eightn input'
|
||||
in fun k -> dft (2 * k + 1)
|
||||
|
||||
let make_dct scale nshift trig =
|
||||
fun n input ->
|
||||
trig (n - nshift) (Complex.real @@ (Complex.times scale) @@
|
||||
(zero_extend n input))
|
||||
(*
|
||||
* DCT-I: y[k] = sum x[j] cos(pi * j * k / n)
|
||||
*)
|
||||
let dctI = make_dct Complex.one 1 trigI
|
||||
|
||||
(*
|
||||
* DCT-II: y[k] = sum x[j] cos(pi * (j + 1/2) * k / n)
|
||||
*)
|
||||
let dctII = make_dct Complex.one 0 trigII
|
||||
|
||||
(*
|
||||
* DCT-III: y[k] = sum x[j] cos(pi * j * (k + 1/2) / n)
|
||||
*)
|
||||
let dctIII = make_dct Complex.half 0 trigIII
|
||||
|
||||
(*
|
||||
* DCT-IV y[k] = sum x[j] cos(pi * (j + 1/2) * (k + 1/2) / n)
|
||||
*)
|
||||
let dctIV = make_dct Complex.half 0 trigIV
|
||||
|
||||
let shift s input = fun i -> input (i - s)
|
||||
|
||||
(* DST-x input := TRIG-x (input / i) *)
|
||||
let make_dst scale nshift kshift jshift trig =
|
||||
fun n input ->
|
||||
Complex.real @@
|
||||
(shift (- jshift)
|
||||
(trig (n + nshift) (Complex.uminus @@
|
||||
(Complex.times Complex.i) @@
|
||||
(Complex.times scale) @@
|
||||
Complex.real @@
|
||||
(shift kshift (zero_extend n input)))))
|
||||
|
||||
(*
|
||||
* DST-I: y[k] = sum x[j] sin(pi * j * k / n)
|
||||
*)
|
||||
let dstI = make_dst Complex.one 1 1 1 trigI
|
||||
|
||||
(*
|
||||
* DST-II: y[k] = sum x[j] sin(pi * (j + 1/2) * k / n)
|
||||
*)
|
||||
let dstII = make_dst Complex.one 0 0 1 trigII
|
||||
|
||||
(*
|
||||
* DST-III: y[k] = sum x[j] sin(pi * j * (k + 1/2) / n)
|
||||
*)
|
||||
let dstIII = make_dst Complex.half 0 1 0 trigIII
|
||||
|
||||
(*
|
||||
* DST-IV y[k] = sum x[j] sin(pi * (j + 1/2) * (k + 1/2) / n)
|
||||
*)
|
||||
let dstIV = make_dst Complex.half 0 0 0 trigIV
|
||||
|
||||
35
fftw-3.3.10/genfft/trig.mli
Normal file
35
fftw-3.3.10/genfft/trig.mli
Normal file
@@ -0,0 +1,35 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val rdft : int -> int -> Complex.signal -> Complex.signal
|
||||
val hdft : int -> int -> Complex.signal -> Complex.signal
|
||||
val dft_via_rdft : int -> int -> Complex.signal -> Complex.signal
|
||||
val dht : int -> int -> Complex.signal -> Complex.signal
|
||||
|
||||
val dctI : int -> Complex.signal -> Complex.signal
|
||||
val dctII : int -> Complex.signal -> Complex.signal
|
||||
val dctIII : int -> Complex.signal -> Complex.signal
|
||||
val dctIV : int -> Complex.signal -> Complex.signal
|
||||
|
||||
val dstI : int -> Complex.signal -> Complex.signal
|
||||
val dstII : int -> Complex.signal -> Complex.signal
|
||||
val dstIII : int -> Complex.signal -> Complex.signal
|
||||
val dstIV : int -> Complex.signal -> Complex.signal
|
||||
188
fftw-3.3.10/genfft/twiddle.ml
Normal file
188
fftw-3.3.10/genfft/twiddle.ml
Normal file
@@ -0,0 +1,188 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* policies for loading/computing twiddle factors *)
|
||||
open Complex
|
||||
open Util
|
||||
|
||||
type twop = TW_FULL | TW_CEXP | TW_NEXT
|
||||
|
||||
let optostring = function
|
||||
| TW_CEXP -> "TW_CEXP"
|
||||
| TW_NEXT -> "TW_NEXT"
|
||||
| TW_FULL -> "TW_FULL"
|
||||
|
||||
type twinstr = (twop * int * int)
|
||||
|
||||
let rec unroll_twfull l = match l with
|
||||
| [] -> []
|
||||
| (TW_FULL, v, n) :: b ->
|
||||
(forall [] cons 1 n (fun i -> (TW_CEXP, v, i)))
|
||||
@ unroll_twfull b
|
||||
| a :: b -> a :: unroll_twfull b
|
||||
|
||||
let twinstr_to_c_string l =
|
||||
let one (op, a, b) = Printf.sprintf "{ %s, %d, %d }" (optostring op) a b
|
||||
in let rec loop first = function
|
||||
| [] -> ""
|
||||
| a :: b -> (if first then "\n" else ",\n") ^ (one a) ^ (loop false b)
|
||||
in "{" ^ (loop true l) ^ "}"
|
||||
|
||||
let twinstr_to_simd_string vl l =
|
||||
let one sep = function
|
||||
| (TW_NEXT, 1, 0) -> sep ^ "{TW_NEXT, " ^ vl ^ ", 0}"
|
||||
| (TW_NEXT, _, _) -> failwith "twinstr_to_simd_string"
|
||||
| (TW_CEXP, v, b) -> sep ^ (Printf.sprintf "VTW(%d,%d)" v b)
|
||||
| _ -> failwith "twinstr_to_simd_string"
|
||||
in let rec loop first = function
|
||||
| [] -> ""
|
||||
| a :: b -> (one (if first then "\n" else ",\n") a) ^ (loop false b)
|
||||
in "{" ^ (loop true (unroll_twfull l)) ^ "}"
|
||||
|
||||
let rec pow m n =
|
||||
if (n = 0) then 1
|
||||
else m * pow m (n - 1)
|
||||
|
||||
let rec is_pow m n =
|
||||
n = 1 || ((n mod m) = 0 && is_pow m (n / m))
|
||||
|
||||
let rec log m n = if n = 1 then 0 else 1 + log m (n / m)
|
||||
|
||||
let rec largest_power_smaller_than m i =
|
||||
if (is_pow m i) then i
|
||||
else largest_power_smaller_than m (i - 1)
|
||||
|
||||
let rec smallest_power_larger_than m i =
|
||||
if (is_pow m i) then i
|
||||
else smallest_power_larger_than m (i + 1)
|
||||
|
||||
let rec_array n f =
|
||||
let g = ref (fun i -> Complex.zero) in
|
||||
let a = Array.init n (fun i -> lazy (!g i)) in
|
||||
let h i = f (fun i -> Lazy.force a.(i)) i in
|
||||
begin
|
||||
g := h;
|
||||
h
|
||||
end
|
||||
|
||||
|
||||
let ctimes use_complex_arith a b =
|
||||
if use_complex_arith then
|
||||
Complex.ctimes a b
|
||||
else
|
||||
Complex.times a b
|
||||
|
||||
let ctimesj use_complex_arith a b =
|
||||
if use_complex_arith then
|
||||
Complex.ctimesj a b
|
||||
else
|
||||
Complex.times (Complex.conj a) b
|
||||
|
||||
let make_bytwiddle sign use_complex_arith g f i =
|
||||
if i = 0 then
|
||||
f i
|
||||
else if sign = 1 then
|
||||
ctimes use_complex_arith (g i) (f i)
|
||||
else
|
||||
ctimesj use_complex_arith (g i) (f i)
|
||||
|
||||
(* various policies for computing/loading twiddle factors *)
|
||||
|
||||
let twiddle_policy_load_all v use_complex_arith =
|
||||
let bytwiddle n sign w f =
|
||||
make_bytwiddle sign use_complex_arith (fun i -> w (i - 1)) f
|
||||
and twidlen n = 2 * (n - 1)
|
||||
and twdesc r = [(TW_FULL, v, r);(TW_NEXT, 1, 0)]
|
||||
in bytwiddle, twidlen, twdesc
|
||||
|
||||
(*
|
||||
* if i is a power of two, then load w (log i)
|
||||
* else let x = largest power of 2 less than i in
|
||||
* let y = i - x in
|
||||
* compute w^{x+y} = w^x * w^y
|
||||
*)
|
||||
let twiddle_policy_log2 v use_complex_arith =
|
||||
let bytwiddle n sign w f =
|
||||
let g = rec_array n (fun self i ->
|
||||
if i = 0 then Complex.one
|
||||
else if is_pow 2 i then w (log 2 i)
|
||||
else let x = largest_power_smaller_than 2 i in
|
||||
let y = i - x in
|
||||
ctimes use_complex_arith (self x) (self y))
|
||||
in make_bytwiddle sign use_complex_arith g f
|
||||
and twidlen n = 2 * (log 2 (largest_power_smaller_than 2 (2 * n - 1)))
|
||||
and twdesc n =
|
||||
(List.flatten
|
||||
(List.map
|
||||
(fun i ->
|
||||
if i > 0 && is_pow 2 i then
|
||||
[TW_CEXP, v, i]
|
||||
else
|
||||
[])
|
||||
(iota n)))
|
||||
@ [(TW_NEXT, 1, 0)]
|
||||
in bytwiddle, twidlen, twdesc
|
||||
|
||||
let twiddle_policy_log3 v use_complex_arith =
|
||||
let rec terms_needed i pi s n =
|
||||
if (s >= n - 1) then i
|
||||
else terms_needed (i + 1) (3 * pi) (s + pi) n
|
||||
in
|
||||
let rec bytwiddle n sign w f =
|
||||
let nterms = terms_needed 0 1 0 n in
|
||||
let maxterm = pow 3 (nterms - 1) in
|
||||
let g = rec_array (3 * n) (fun self i ->
|
||||
if i = 0 then Complex.one
|
||||
else if is_pow 3 i then w (log 3 i)
|
||||
else if i = (n - 1) && maxterm >= n then
|
||||
w (nterms - 1)
|
||||
else let x = smallest_power_larger_than 3 i in
|
||||
if (i + i >= x) then
|
||||
let x = min x (n - 1) in
|
||||
ctimesj use_complex_arith (self (x - i)) (self x)
|
||||
else let x = largest_power_smaller_than 3 i in
|
||||
ctimes use_complex_arith (self (i - x)) (self x))
|
||||
in make_bytwiddle sign use_complex_arith g f
|
||||
and twidlen n = 2 * (terms_needed 0 1 0 n)
|
||||
and twdesc n =
|
||||
(List.map
|
||||
(fun i ->
|
||||
let x = min (pow 3 i) (n - 1) in
|
||||
TW_CEXP, v, x)
|
||||
(iota ((twidlen n) / 2)))
|
||||
@ [(TW_NEXT, 1, 0)]
|
||||
in bytwiddle, twidlen, twdesc
|
||||
|
||||
let current_twiddle_policy = ref twiddle_policy_load_all
|
||||
|
||||
let twiddle_policy use_complex_arith =
|
||||
!current_twiddle_policy use_complex_arith
|
||||
|
||||
let set_policy x = Arg.Unit (fun () -> current_twiddle_policy := x)
|
||||
let set_policy_int x = Arg.Int (fun i -> current_twiddle_policy := x i)
|
||||
|
||||
let undocumented = " Undocumented twiddle policy"
|
||||
|
||||
let speclist = [
|
||||
"-twiddle-load-all", set_policy twiddle_policy_load_all, undocumented;
|
||||
"-twiddle-log2", set_policy twiddle_policy_log2, undocumented;
|
||||
"-twiddle-log3", set_policy twiddle_policy_log3, undocumented;
|
||||
]
|
||||
32
fftw-3.3.10/genfft/twiddle.mli
Normal file
32
fftw-3.3.10/genfft/twiddle.mli
Normal file
@@ -0,0 +1,32 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val speclist : (string * Arg.spec * string) list
|
||||
|
||||
type twinstr
|
||||
|
||||
val twiddle_policy :
|
||||
int -> bool ->
|
||||
(int -> int -> (int -> Complex.expr) -> (int -> Complex.expr) ->
|
||||
int -> Complex.expr) *(int -> int) * (int -> twinstr list)
|
||||
|
||||
val twinstr_to_c_string : twinstr list -> string
|
||||
val twinstr_to_simd_string : string -> twinstr list -> string
|
||||
35
fftw-3.3.10/genfft/unique.ml
Normal file
35
fftw-3.3.10/genfft/unique.ml
Normal file
@@ -0,0 +1,35 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* repository of unique tokens *)
|
||||
|
||||
type unique = Unique of int
|
||||
|
||||
let make =
|
||||
let id = ref 0 in
|
||||
fun () -> begin
|
||||
id := !id + 1;
|
||||
Unique !id
|
||||
end
|
||||
|
||||
let same (Unique a) (Unique b) =
|
||||
a = b
|
||||
|
||||
24
fftw-3.3.10/genfft/unique.mli
Normal file
24
fftw-3.3.10/genfft/unique.mli
Normal file
@@ -0,0 +1,24 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type unique
|
||||
val make : unit -> unique
|
||||
val same : unique -> unique -> bool
|
||||
176
fftw-3.3.10/genfft/util.ml
Normal file
176
fftw-3.3.10/genfft/util.ml
Normal file
@@ -0,0 +1,176 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
(* various utility functions *)
|
||||
open List
|
||||
open Unix
|
||||
|
||||
(*****************************************
|
||||
* Integer operations
|
||||
*****************************************)
|
||||
(* fint the inverse of n modulo m *)
|
||||
let invmod n m =
|
||||
let rec loop i =
|
||||
if ((i * n) mod m == 1) then i
|
||||
else loop (i + 1)
|
||||
in
|
||||
loop 1
|
||||
|
||||
(* Yooklid's algorithm *)
|
||||
let rec gcd n m =
|
||||
if (n > m)
|
||||
then gcd m n
|
||||
else
|
||||
let r = m mod n
|
||||
in
|
||||
if (r == 0) then n
|
||||
else gcd r n
|
||||
|
||||
(* reduce the fraction m/n to lowest terms, modulo factors of n/n *)
|
||||
let lowest_terms n m =
|
||||
if (m mod n == 0) then
|
||||
(1,0)
|
||||
else
|
||||
let nn = (abs n) in let mm = m * (n / nn)
|
||||
in let mpos =
|
||||
if (mm > 0) then (mm mod nn)
|
||||
else (mm + (1 + (abs mm) / nn) * nn) mod nn
|
||||
and d = gcd nn (abs mm)
|
||||
in (nn / d, mpos / d)
|
||||
|
||||
(* find a generator for the multiplicative group mod p
|
||||
(where p must be prime for a generator to exist!!) *)
|
||||
|
||||
exception No_Generator
|
||||
|
||||
let find_generator p =
|
||||
let rec period x prod =
|
||||
if (prod == 1) then 1
|
||||
else 1 + (period x (prod * x mod p))
|
||||
in let rec findgen x =
|
||||
if (x == 0) then raise No_Generator
|
||||
else if ((period x x) == (p - 1)) then x
|
||||
else findgen ((x + 1) mod p)
|
||||
in findgen 1
|
||||
|
||||
(* raise x to a power n modulo p (requires n > 0) (in principle,
|
||||
negative powers would be fine, provided that x and p are relatively
|
||||
prime...we don't need this functionality, though) *)
|
||||
|
||||
exception Negative_Power
|
||||
|
||||
let rec pow_mod x n p =
|
||||
if (n == 0) then 1
|
||||
else if (n < 0) then raise Negative_Power
|
||||
else if (n mod 2 == 0) then pow_mod (x * x mod p) (n / 2) p
|
||||
else x * (pow_mod x (n - 1) p) mod p
|
||||
|
||||
(******************************************
|
||||
* auxiliary functions
|
||||
******************************************)
|
||||
let rec forall id combiner a b f =
|
||||
if (a >= b) then id
|
||||
else combiner (f a) (forall id combiner (a + 1) b f)
|
||||
|
||||
let sum_list l = fold_right (+) l 0
|
||||
let max_list l = fold_right (max) l (-999999)
|
||||
let min_list l = fold_right (min) l 999999
|
||||
let count pred = fold_left
|
||||
(fun a elem -> if (pred elem) then 1 + a else a) 0
|
||||
let remove elem = List.filter (fun e -> (e != elem))
|
||||
let cons a b = a :: b
|
||||
let null = function
|
||||
[] -> true
|
||||
| _ -> false
|
||||
let for_list l f = List.iter f l
|
||||
let rmap l f = List.map f l
|
||||
|
||||
(* functional composition *)
|
||||
let (@@) f g x = f (g x)
|
||||
|
||||
let forall_flat a b = forall [] (@) a b
|
||||
|
||||
let identity x = x
|
||||
|
||||
let rec minimize f = function
|
||||
[] -> None
|
||||
| elem :: rest ->
|
||||
match minimize f rest with
|
||||
None -> Some elem
|
||||
| Some x -> if (f x) >= (f elem) then Some elem else Some x
|
||||
|
||||
|
||||
let rec find_elem condition = function
|
||||
[] -> None
|
||||
| elem :: rest ->
|
||||
if condition elem then
|
||||
Some elem
|
||||
else
|
||||
find_elem condition rest
|
||||
|
||||
|
||||
(* find x, x >= a, such that (p x) is true *)
|
||||
let rec suchthat a pred =
|
||||
if (pred a) then a else suchthat (a + 1) pred
|
||||
|
||||
(* print an information message *)
|
||||
let info string =
|
||||
if !Magic.verbose then begin
|
||||
let now = Unix.times ()
|
||||
and pid = Unix.getpid () in
|
||||
prerr_string ((string_of_int pid) ^ ": " ^
|
||||
"at t = " ^ (string_of_float now.tms_utime) ^ " : ");
|
||||
prerr_string (string ^ "\n");
|
||||
flush Pervasives.stderr;
|
||||
end
|
||||
|
||||
(* iota n produces the list [0; 1; ...; n - 1] *)
|
||||
let iota n = forall [] cons 0 n identity
|
||||
|
||||
(* interval a b produces the list [a; a + 1; ...; b - 1] *)
|
||||
let interval a b = List.map ((+) a) (iota (b - a))
|
||||
|
||||
(*
|
||||
* freeze a function, i.e., compute it only once on demand, and
|
||||
* cache it into an array.
|
||||
*)
|
||||
let array n f =
|
||||
let a = Array.init n (fun i -> lazy (f i))
|
||||
in fun i -> Lazy.force a.(i)
|
||||
|
||||
|
||||
let rec take n l =
|
||||
match (n, l) with
|
||||
(0, _) -> []
|
||||
| (n, (a :: b)) -> a :: (take (n - 1) b)
|
||||
| _ -> failwith "take"
|
||||
|
||||
let rec drop n l =
|
||||
match (n, l) with
|
||||
(0, _) -> l
|
||||
| (n, (_ :: b)) -> drop (n - 1) b
|
||||
| _ -> failwith "drop"
|
||||
|
||||
|
||||
let either a b =
|
||||
match a with
|
||||
Some x -> x
|
||||
| _ -> b
|
||||
49
fftw-3.3.10/genfft/util.mli
Normal file
49
fftw-3.3.10/genfft/util.mli
Normal file
@@ -0,0 +1,49 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
val invmod : int -> int -> int
|
||||
val gcd : int -> int -> int
|
||||
val lowest_terms : int -> int -> int * int
|
||||
val find_generator : int -> int
|
||||
val pow_mod : int -> int -> int -> int
|
||||
val forall : 'a -> ('b -> 'a -> 'a) -> int -> int -> (int -> 'b) -> 'a
|
||||
val sum_list : int list -> int
|
||||
val max_list : int list -> int
|
||||
val min_list : int list -> int
|
||||
val count : ('a -> bool) -> 'a list -> int
|
||||
val remove : 'a -> 'a list -> 'a list
|
||||
val for_list : 'a list -> ('a -> unit) -> unit
|
||||
val rmap : 'a list -> ('a -> 'b) -> 'b list
|
||||
val cons : 'a -> 'a list -> 'a list
|
||||
val null : 'a list -> bool
|
||||
val (@@) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
|
||||
val forall_flat : int -> int -> (int -> 'a list) -> 'a list
|
||||
val identity : 'a -> 'a
|
||||
val minimize : ('a -> 'b) -> 'a list -> 'a option
|
||||
val find_elem : ('a -> bool) -> 'a list -> 'a option
|
||||
val suchthat : int -> (int -> bool) -> int
|
||||
val info : string -> unit
|
||||
val iota : int -> int list
|
||||
val interval : int -> int -> int list
|
||||
val array : int -> (int -> 'a) -> int -> 'a
|
||||
val take : int -> 'a list -> 'a list
|
||||
val drop : int -> 'a list -> 'a list
|
||||
val either : 'a option -> 'a -> 'a
|
||||
108
fftw-3.3.10/genfft/variable.ml
Normal file
108
fftw-3.3.10/genfft/variable.ml
Normal file
@@ -0,0 +1,108 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type variable =
|
||||
(* temporary variables generated automatically *)
|
||||
| Temporary of int
|
||||
(* memory locations, e.g., array elements *)
|
||||
| Locative of (Unique.unique * Unique.unique *
|
||||
(int -> string) * int * string)
|
||||
(* constant values, e.g., twiddle factors *)
|
||||
| Constant of (Unique.unique * string)
|
||||
|
||||
let hash v = Hashtbl.hash v
|
||||
|
||||
let same a b = (a == b)
|
||||
|
||||
let is_constant = function
|
||||
| Constant _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_temporary = function
|
||||
| Temporary _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_locative = function
|
||||
| Locative _ -> true
|
||||
| _ -> false
|
||||
|
||||
let same_location a b =
|
||||
match (a, b) with
|
||||
| (Locative (location_a, _, _, _, _), Locative (location_b, _, _, _, _)) ->
|
||||
Unique.same location_a location_b
|
||||
| _ -> false
|
||||
|
||||
let same_class a b =
|
||||
match (a, b) with
|
||||
| (Locative (_, class_a, _, _, _), Locative (_, class_b, _, _, _)) ->
|
||||
Unique.same class_a class_b
|
||||
| (Constant (class_a, _), Constant (class_b, _)) ->
|
||||
Unique.same class_a class_b
|
||||
| _ -> false
|
||||
|
||||
let make_temporary =
|
||||
let tmp_count = ref 0
|
||||
in fun () -> begin
|
||||
tmp_count := !tmp_count + 1;
|
||||
Temporary !tmp_count
|
||||
end
|
||||
|
||||
let make_constant class_token name =
|
||||
Constant (class_token, name)
|
||||
|
||||
let make_locative location_token class_token name i vs =
|
||||
Locative (location_token, class_token, name, i, vs)
|
||||
|
||||
let vstride_of_locative = function
|
||||
| Locative (_, _, _, _, vs) -> vs
|
||||
| _ -> failwith "vstride_of_locative"
|
||||
|
||||
(* special naming conventions for variables *)
|
||||
let rec base62_of_int k =
|
||||
let x = k mod 62
|
||||
and y = k / 62 in
|
||||
let c =
|
||||
if x < 10 then
|
||||
Char.chr (x + Char.code '0')
|
||||
else if x < 36 then
|
||||
Char.chr (x + Char.code 'a' - 10)
|
||||
else
|
||||
Char.chr (x + Char.code 'A' - 36)
|
||||
in
|
||||
let s = String.make 1 c in
|
||||
let r = if y == 0 then "" else base62_of_int y in
|
||||
r ^ s
|
||||
|
||||
let varname_of_int k =
|
||||
if !Magic.compact then
|
||||
base62_of_int k
|
||||
else
|
||||
string_of_int k
|
||||
|
||||
let unparse = function
|
||||
| Temporary k -> "T" ^ (varname_of_int k)
|
||||
| Constant (_, name) -> name
|
||||
| Locative (_, _, name, i, _) -> name i
|
||||
|
||||
let unparse_for_alignment m = function
|
||||
| Locative (_, _, name, i, _) -> name (i mod m)
|
||||
| _ -> failwith "unparse_for_alignment"
|
||||
|
||||
38
fftw-3.3.10/genfft/variable.mli
Normal file
38
fftw-3.3.10/genfft/variable.mli
Normal file
@@ -0,0 +1,38 @@
|
||||
(*
|
||||
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*)
|
||||
|
||||
type variable
|
||||
|
||||
val hash : variable -> int
|
||||
val same : variable -> variable -> bool
|
||||
val is_constant : variable -> bool
|
||||
val is_temporary : variable -> bool
|
||||
val is_locative : variable -> bool
|
||||
val same_location : variable -> variable -> bool
|
||||
val same_class : variable -> variable -> bool
|
||||
val make_temporary : unit -> variable
|
||||
val make_constant : Unique.unique -> string -> variable
|
||||
val make_locative :
|
||||
Unique.unique -> Unique.unique -> (int -> string) ->
|
||||
int -> string -> variable
|
||||
val unparse : variable -> string
|
||||
val unparse_for_alignment : int -> variable -> string
|
||||
val vstride_of_locative : variable -> string
|
||||
Reference in New Issue
Block a user