This commit is contained in:
2025-07-12 12:17:44 +03:00
parent c759f60ff7
commit 792e1b937a
3507 changed files with 492613 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
# this makefile requires GNU make.
EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \
conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \
gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \
gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \
gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \
monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \
to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \
algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \
dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \
schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \
util.mli variable.mli
GENFFT_NATIVE=gen_notw.native gen_notw_c.native gen_twiddle.native \
gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \
gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \
gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \
gen_mdct.native
all-local::
$(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE)
maintainer-clean-local::
$(OCAMLBUILD) -classic-display -clean

View File

@@ -0,0 +1,509 @@
# Makefile.in generated by automake 1.16.3 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2020 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
# this makefile requires GNU make.
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = genfft
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_mpi.m4 \
$(top_srcdir)/m4/acx_pthread.m4 \
$(top_srcdir)/m4/ax_cc_maxopt.m4 \
$(top_srcdir)/m4/ax_check_compiler_flags.m4 \
$(top_srcdir)/m4/ax_compiler_vendor.m4 \
$(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \
$(top_srcdir)/m4/ax_gcc_version.m4 \
$(top_srcdir)/m4/ax_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
am__DIST_COMMON = $(srcdir)/Makefile.in
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
ALTIVEC_CFLAGS = @ALTIVEC_CFLAGS@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AS = @AS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AVX2_CFLAGS = @AVX2_CFLAGS@
AVX512_CFLAGS = @AVX512_CFLAGS@
AVX_128_FMA_CFLAGS = @AVX_128_FMA_CFLAGS@
AVX_CFLAGS = @AVX_CFLAGS@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CHECK_PL_OPTS = @CHECK_PL_OPTS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
C_FFTW_R2R_KIND = @C_FFTW_R2R_KIND@
C_MPI_FINT = @C_MPI_FINT@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
F77 = @F77@
FFLAGS = @FFLAGS@
FGREP = @FGREP@
FLIBS = @FLIBS@
GREP = @GREP@
INDENT = @INDENT@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
KCVI_CFLAGS = @KCVI_CFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBQUADMATH = @LIBQUADMATH@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MPICC = @MPICC@
MPILIBS = @MPILIBS@
MPIRUN = @MPIRUN@
NEON_CFLAGS = @NEON_CFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OCAMLBUILD = @OCAMLBUILD@
OPENMP_CFLAGS = @OPENMP_CFLAGS@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
POW_LIB = @POW_LIB@
PRECISION = @PRECISION@
PREC_SUFFIX = @PREC_SUFFIX@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHARED_VERSION_INFO = @SHARED_VERSION_INFO@
SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
STACK_ALIGN_CFLAGS = @STACK_ALIGN_CFLAGS@
STRIP = @STRIP@
THREADLIBS = @THREADLIBS@
VERSION = @VERSION@
VSX_CFLAGS = @VSX_CFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
ac_ct_F77 = @ac_ct_F77@
acx_pthread_config = @acx_pthread_config@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
EXTRA_DIST = algsimp.ml annotate.ml assoctable.ml c.ml complex.ml \
conv.ml dag.ml expr.ml fft.ml gen_hc2c.ml gen_hc2cdft.ml \
gen_hc2cdft_c.ml gen_hc2hc.ml gen_r2cb.ml gen_mdct.ml gen_notw.ml \
gen_notw_c.ml gen_r2cf.ml gen_r2r.ml gen_twiddle.ml gen_twiddle_c.ml \
gen_twidsq.ml gen_twidsq_c.ml genutil.ml littlesimp.ml magic.ml \
monads.ml number.ml oracle.ml schedule.ml simd.ml simdmagic.ml \
to_alist.ml trig.ml twiddle.ml unique.ml util.ml variable.ml \
algsimp.mli annotate.mli assoctable.mli c.mli complex.mli conv.mli \
dag.mli expr.mli fft.mli littlesimp.mli number.mli oracle.mli \
schedule.mli simd.mli to_alist.mli trig.mli twiddle.mli unique.mli \
util.mli variable.mli
GENFFT_NATIVE = gen_notw.native gen_notw_c.native gen_twiddle.native \
gen_twiddle_c.native gen_twidsq.native gen_twidsq_c.native \
gen_r2r.native gen_r2cf.native gen_r2cb.native gen_hc2c.native \
gen_hc2cdft.native gen_hc2cdft_c.native gen_hc2hc.native \
gen_mdct.native
all: all-am
.SUFFIXES:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu genfft/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu genfft/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
tags TAGS:
ctags CTAGS:
cscope cscopelist:
distdir: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) distdir-am
distdir-am: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile all-local
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-am
-rm -f Makefile
distclean-am: clean-am distclean-generic
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic \
maintainer-clean-local
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: all all-am all-local check check-am clean clean-generic \
clean-libtool cscopelist-am ctags-am distclean \
distclean-generic distclean-libtool distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic maintainer-clean-local mostlyclean \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags-am uninstall uninstall-am
.PRECIOUS: Makefile
all-local::
$(OCAMLBUILD) -classic-display -libs unix,nums $(GENFFT_NATIVE)
maintainer-clean-local::
$(OCAMLBUILD) -classic-display -clean
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,580 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Expr
let node_insert x = Assoctable.insert Expr.hash x
let node_lookup x = Assoctable.lookup Expr.hash (==) x
(*************************************************************
* Algebraic simplifier/elimination of common subexpressions
*************************************************************)
module AlgSimp : sig
val algsimp : expr list -> expr list
end = struct
open Monads.StateMonad
open Monads.MemoMonad
open Assoctable
let fetchSimp =
fetchState >>= fun (s, _) -> returnM s
let storeSimp s =
fetchState >>= (fun (_, c) -> storeState (s, c))
let lookupSimpM key =
fetchSimp >>= fun table ->
returnM (node_lookup key table)
let insertSimpM key value =
fetchSimp >>= fun table ->
storeSimp (node_insert key value table)
let subset a b =
List.for_all (fun x -> List.exists (fun y -> x == y) b) a
let structurallyEqualCSE a b =
match (a, b) with
| (Num a, Num b) -> Number.equal a b
| (NaN a, NaN b) -> a == b
| (Load a, Load b) -> Variable.same a b
| (Times (a, a'), Times (b, b')) ->
((a == b) && (a' == b')) ||
((a == b') && (a' == b))
| (CTimes (a, a'), CTimes (b, b')) ->
((a == b) && (a' == b')) ||
((a == b') && (a' == b))
| (CTimesJ (a, a'), CTimesJ (b, b')) -> ((a == b) && (a' == b'))
| (Plus a, Plus b) -> subset a b && subset b a
| (Uminus a, Uminus b) -> (a == b)
| _ -> false
let hashCSE x =
if (!Magic.randomized_cse) then
Oracle.hash x
else
Expr.hash x
let equalCSE a b =
if (!Magic.randomized_cse) then
(structurallyEqualCSE a b || Oracle.likely_equal a b)
else
structurallyEqualCSE a b
let fetchCSE =
fetchState >>= fun (_, c) -> returnM c
let storeCSE c =
fetchState >>= (fun (s, _) -> storeState (s, c))
let lookupCSEM key =
fetchCSE >>= fun table ->
returnM (Assoctable.lookup hashCSE equalCSE key table)
let insertCSEM key value =
fetchCSE >>= fun table ->
storeCSE (Assoctable.insert hashCSE key value table)
(* memoize both x and Uminus x (unless x is already negated) *)
let identityM x =
let memo x = memoizing lookupCSEM insertCSEM returnM x in
match x with
Uminus _ -> memo x
| _ -> memo x >>= fun x' -> memo (Uminus x') >> returnM x'
let makeNode = identityM
(* simplifiers for various kinds of nodes *)
let rec snumM = function
n when Number.is_zero n ->
makeNode (Num (Number.zero))
| n when Number.negative n ->
makeNode (Num (Number.negate n)) >>= suminusM
| n -> makeNode (Num n)
and suminusM = function
Uminus x -> makeNode x
| Num a when (Number.is_zero a) -> snumM Number.zero
| a -> makeNode (Uminus a)
and stimesM = function
| (Uminus a, b) -> stimesM (a, b) >>= suminusM
| (a, Uminus b) -> stimesM (a, b) >>= suminusM
| (NaN I, CTimes (a, b)) -> stimesM (NaN I, b) >>=
fun ib -> sctimesM (a, ib)
| (NaN I, CTimesJ (a, b)) -> stimesM (NaN I, b) >>=
fun ib -> sctimesjM (a, ib)
| (Num a, Num b) -> snumM (Number.mul a b)
| (Num a, Times (Num b, c)) ->
snumM (Number.mul a b) >>= fun x -> stimesM (x, c)
| (Num a, b) when Number.is_zero a -> snumM Number.zero
| (Num a, b) when Number.is_one a -> makeNode b
| (Num a, b) when Number.is_mone a -> suminusM b
| (a, b) when is_known_constant b && not (is_known_constant a) ->
stimesM (b, a)
| (a, b) -> makeNode (Times (a, b))
and sctimesM = function
| (Uminus a, b) -> sctimesM (a, b) >>= suminusM
| (a, Uminus b) -> sctimesM (a, b) >>= suminusM
| (a, b) -> makeNode (CTimes (a, b))
and sctimesjM = function
| (Uminus a, b) -> sctimesjM (a, b) >>= suminusM
| (a, Uminus b) -> sctimesjM (a, b) >>= suminusM
| (a, b) -> makeNode (CTimesJ (a, b))
and reduce_sumM x = match x with
[] -> returnM []
| [Num a] ->
if (Number.is_zero a) then
returnM []
else returnM x
| [Uminus (Num a)] ->
if (Number.is_zero a) then
returnM []
else returnM x
| (Num a) :: (Num b) :: s ->
snumM (Number.add a b) >>= fun x ->
reduce_sumM (x :: s)
| (Num a) :: (Uminus (Num b)) :: s ->
snumM (Number.sub a b) >>= fun x ->
reduce_sumM (x :: s)
| (Uminus (Num a)) :: (Num b) :: s ->
snumM (Number.sub b a) >>= fun x ->
reduce_sumM (x :: s)
| (Uminus (Num a)) :: (Uminus (Num b)) :: s ->
snumM (Number.add a b) >>=
suminusM >>= fun x ->
reduce_sumM (x :: s)
| ((Num _) as a) :: b :: s -> reduce_sumM (b :: a :: s)
| ((Uminus (Num _)) as a) :: b :: s -> reduce_sumM (b :: a :: s)
| a :: s ->
reduce_sumM s >>= fun s' -> returnM (a :: s')
and collectible1 = function
| NaN _ -> false
| Uminus x -> collectible1 x
| _ -> true
and collectible (a, b) = collectible1 a
(* collect common factors: ax + bx -> (a+b)x *)
and collectM which x =
let rec findCoeffM which = function
| Times (a, b) when collectible (which (a, b)) -> returnM (which (a, b))
| Uminus x ->
findCoeffM which x >>= fun (coeff, b) ->
suminusM coeff >>= fun mcoeff ->
returnM (mcoeff, b)
| x -> snumM Number.one >>= fun one -> returnM (one, x)
and separateM xpr = function
[] -> returnM ([], [])
| a :: b ->
separateM xpr b >>= fun (w, wo) ->
(* try first factor *)
findCoeffM (fun (a, b) -> (a, b)) a >>= fun (c, x) ->
if (xpr == x) && collectible (c, x) then returnM (c :: w, wo)
else
(* try second factor *)
findCoeffM (fun (a, b) -> (b, a)) a >>= fun (c, x) ->
if (xpr == x) && collectible (c, x) then returnM (c :: w, wo)
else returnM (w, a :: wo)
in match x with
[] -> returnM x
| [a] -> returnM x
| a :: b ->
findCoeffM which a >>= fun (_, xpr) ->
separateM xpr x >>= fun (w, wo) ->
collectM which wo >>= fun wo' ->
splusM w >>= fun w' ->
stimesM (w', xpr) >>= fun t' ->
returnM (t':: wo')
and mangleSumM x = returnM x
>>= reduce_sumM
>>= collectM (fun (a, b) -> (a, b))
>>= collectM (fun (a, b) -> (b, a))
>>= reduce_sumM
>>= deepCollectM !Magic.deep_collect_depth
>>= reduce_sumM
and reorder_uminus = function (* push all Uminuses to the end *)
[] -> []
| ((Uminus _) as a' :: b) -> (reorder_uminus b) @ [a']
| (a :: b) -> a :: (reorder_uminus b)
and canonicalizeM = function
[] -> snumM Number.zero
| [a] -> makeNode a (* one term *)
| a -> generateFusedMultAddM (reorder_uminus a)
and generateFusedMultAddM =
let rec is_multiplication = function
| Times (Num a, b) -> true
| Uminus (Times (Num a, b)) -> true
| _ -> false
and separate = function
[] -> ([], [], Number.zero)
| (Times (Num a, b)) as this :: c ->
let (x, y, max) = separate c in
let newmax = if (Number.greater a max) then a else max in
(this :: x, y, newmax)
| (Uminus (Times (Num a, b))) as this :: c ->
let (x, y, max) = separate c in
let newmax = if (Number.greater a max) then a else max in
(this :: x, y, newmax)
| this :: c ->
let (x, y, max) = separate c in
(x, this :: y, max)
in fun l ->
if !Magic.enable_fma && count is_multiplication l >= 2 then
let (w, wo, max) = separate l in
snumM (Number.div Number.one max) >>= fun invmax' ->
snumM max >>= fun max' ->
mapM (fun x -> stimesM (invmax', x)) w >>= splusM >>= fun pw' ->
stimesM (max', pw') >>= fun mw' ->
splusM (wo @ [mw'])
else
makeNode (Plus l)
and negative = function
Uminus _ -> true
| _ -> false
(*
* simplify patterns of the form
*
* ((c_1 * a + ...) + ...) + (c_2 * a + ...)
*
* The pattern includes arbitrary coefficients and minus signs.
* A common case of this pattern is the butterfly
* (a + b) + (a - b)
* (a + b) - (a - b)
*)
(* this whole procedure needs much more thought *)
and deepCollectM maxdepth l =
let rec findTerms depth x = match x with
| Uminus x -> findTerms depth x
| Times (Num _, b) -> (findTerms (depth - 1) b)
| Plus l when depth > 0 ->
x :: List.flatten (List.map (findTerms (depth - 1)) l)
| x -> [x]
and duplicates = function
[] -> []
| a :: b -> if List.memq a b then a :: duplicates b
else duplicates b
in let rec splitDuplicates depth d x =
if (List.memq x d) then
snumM (Number.zero) >>= fun zero ->
returnM (zero, x)
else match x with
| Times (a, b) ->
splitDuplicates (depth - 1) d a >>= fun (a', xa) ->
splitDuplicates (depth - 1) d b >>= fun (b', xb) ->
stimesM (a', b') >>= fun ab ->
stimesM (a, xb) >>= fun xb' ->
stimesM (xa, b) >>= fun xa' ->
stimesM (xa, xb) >>= fun xab ->
splusM [xa'; xb'; xab] >>= fun x ->
returnM (ab, x)
| Uminus a ->
splitDuplicates depth d a >>= fun (x, y) ->
suminusM x >>= fun ux ->
suminusM y >>= fun uy ->
returnM (ux, uy)
| Plus l when depth > 0 ->
mapM (splitDuplicates (depth - 1) d) l >>= fun ld ->
let (l', d') = List.split ld in
splusM l' >>= fun p ->
splusM d' >>= fun d'' ->
returnM (p, d'')
| x ->
snumM (Number.zero) >>= fun zero' ->
returnM (x, zero')
in let l' = List.flatten (List.map (findTerms maxdepth) l)
in match duplicates l' with
| [] -> returnM l
| d ->
mapM (splitDuplicates maxdepth d) l >>= fun ld ->
let (l', d') = List.split ld in
splusM l' >>= fun l'' ->
let rec flattenPlusM = function
| Plus l -> returnM l
| Uminus x ->
flattenPlusM x >>= mapM suminusM
| x -> returnM [x]
in
mapM flattenPlusM d' >>= fun d'' ->
splusM (List.flatten d'') >>= fun d''' ->
mangleSumM [l''; d''']
and splusM l =
let fma_heuristics x =
if !Magic.enable_fma then
match x with
| [Uminus (Times _); Times _] -> Some false
| [Times _; Uminus (Times _)] -> Some false
| [Uminus (_); Times _] -> Some true
| [Times _; Uminus (Plus _)] -> Some true
| [_; Uminus (Times _)] -> Some false
| [Uminus (Times _); _] -> Some false
| _ -> None
else
None
in
mangleSumM l >>= fun l' ->
(* no terms are negative. Don't do anything *)
if not (List.exists negative l') then
canonicalizeM l'
(* all terms are negative. Negate them all and collect the minus sign *)
else if List.for_all negative l' then
mapM suminusM l' >>= splusM >>= suminusM
else match fma_heuristics l' with
| Some true -> mapM suminusM l' >>= splusM >>= suminusM
| Some false -> canonicalizeM l'
| None ->
(* Ask the Oracle for the canonical form *)
if (not !Magic.randomized_cse) &&
Oracle.should_flip_sign (Plus l') then
mapM suminusM l' >>= splusM >>= suminusM
else
canonicalizeM l'
(* monadic style algebraic simplifier for the dag *)
let rec algsimpM x =
memoizing lookupSimpM insertSimpM
(function
| Num a -> snumM a
| NaN _ as x -> makeNode x
| Plus a ->
mapM algsimpM a >>= splusM
| Times (a, b) ->
(algsimpM a >>= fun a' ->
algsimpM b >>= fun b' ->
stimesM (a', b'))
| CTimes (a, b) ->
(algsimpM a >>= fun a' ->
algsimpM b >>= fun b' ->
sctimesM (a', b'))
| CTimesJ (a, b) ->
(algsimpM a >>= fun a' ->
algsimpM b >>= fun b' ->
sctimesjM (a', b'))
| Uminus a ->
algsimpM a >>= suminusM
| Store (v, a) ->
algsimpM a >>= fun a' ->
makeNode (Store (v, a'))
| Load _ as x -> makeNode x)
x
let initialTable = (empty, empty)
let simp_roots = mapM algsimpM
let algsimp = runM initialTable simp_roots
end
(*************************************************************
* Network transposition algorithm
*************************************************************)
module Transpose = struct
open Monads.StateMonad
open Monads.MemoMonad
open Littlesimp
let fetchDuals = fetchState
let storeDuals = storeState
let lookupDualsM key =
fetchDuals >>= fun table ->
returnM (node_lookup key table)
let insertDualsM key value =
fetchDuals >>= fun table ->
storeDuals (node_insert key value table)
let rec visit visited vtable parent_table = function
[] -> (visited, parent_table)
| node :: rest ->
match node_lookup node vtable with
| Some _ -> visit visited vtable parent_table rest
| None ->
let children = match node with
| Store (v, n) -> [n]
| Plus l -> l
| Times (a, b) -> [a; b]
| CTimes (a, b) -> [a; b]
| CTimesJ (a, b) -> [a; b]
| Uminus x -> [x]
| _ -> []
in let rec loop t = function
[] -> t
| a :: rest ->
(match node_lookup a t with
None -> loop (node_insert a [node] t) rest
| Some c -> loop (node_insert a (node :: c) t) rest)
in
(visit
(node :: visited)
(node_insert node () vtable)
(loop parent_table children)
(children @ rest))
let make_transposer parent_table =
let rec termM node candidate_parent =
match candidate_parent with
| Store (_, n) when n == node ->
dualM candidate_parent >>= fun x' -> returnM [x']
| Plus (l) when List.memq node l ->
dualM candidate_parent >>= fun x' -> returnM [x']
| Times (a, b) when b == node ->
dualM candidate_parent >>= fun x' ->
returnM [makeTimes (a, x')]
| CTimes (a, b) when b == node ->
dualM candidate_parent >>= fun x' ->
returnM [CTimes (a, x')]
| CTimesJ (a, b) when b == node ->
dualM candidate_parent >>= fun x' ->
returnM [CTimesJ (a, x')]
| Uminus n when n == node ->
dualM candidate_parent >>= fun x' ->
returnM [makeUminus x']
| _ -> returnM []
and dualExpressionM this_node =
mapM (termM this_node)
(match node_lookup this_node parent_table with
| Some a -> a
| None -> failwith "bug in dualExpressionM"
) >>= fun l ->
returnM (makePlus (List.flatten l))
and dualM this_node =
memoizing lookupDualsM insertDualsM
(function
| Load v as x ->
if (Variable.is_constant v) then
returnM (Load v)
else
(dualExpressionM x >>= fun d ->
returnM (Store (v, d)))
| Store (v, x) -> returnM (Load v)
| x -> dualExpressionM x)
this_node
in dualM
let is_store = function
| Store _ -> true
| _ -> false
let transpose dag =
let _ = Util.info "begin transpose" in
let (all_nodes, parent_table) =
visit [] Assoctable.empty Assoctable.empty dag in
let transposerM = make_transposer parent_table in
let mapTransposerM = mapM transposerM in
let duals = runM Assoctable.empty mapTransposerM all_nodes in
let roots = List.filter is_store duals in
let _ = Util.info "end transpose" in
roots
end
(*************************************************************
* Various dag statistics
*************************************************************)
module Stats : sig
type complexity
val complexity : Expr.expr list -> complexity
val same_complexity : complexity -> complexity -> bool
val leq_complexity : complexity -> complexity -> bool
val to_string : complexity -> string
end = struct
type complexity = int * int * int * int * int * int
let rec visit visited vtable = function
[] -> visited
| node :: rest ->
match node_lookup node vtable with
Some _ -> visit visited vtable rest
| None ->
let children = match node with
Store (v, n) -> [n]
| Plus l -> l
| Times (a, b) -> [a; b]
| Uminus x -> [x]
| _ -> []
in visit (node :: visited)
(node_insert node () vtable)
(children @ rest)
let complexity dag =
let rec loop (load, store, plus, times, uminus, num) = function
[] -> (load, store, plus, times, uminus, num)
| node :: rest ->
loop
(match node with
| Load _ -> (load + 1, store, plus, times, uminus, num)
| Store _ -> (load, store + 1, plus, times, uminus, num)
| Plus x -> (load, store, plus + (List.length x - 1), times, uminus, num)
| Times _ -> (load, store, plus, times + 1, uminus, num)
| Uminus _ -> (load, store, plus, times, uminus + 1, num)
| Num _ -> (load, store, plus, times, uminus, num + 1)
| CTimes _ -> (load, store, plus, times, uminus, num)
| CTimesJ _ -> (load, store, plus, times, uminus, num)
| NaN _ -> (load, store, plus, times, uminus, num))
rest
in let (l, s, p, t, u, n) =
loop (0, 0, 0, 0, 0, 0) (visit [] Assoctable.empty dag)
in (l, s, p, t, u, n)
let weight (l, s, p, t, u, n) =
l + s + 10 * p + 20 * t + u + n
let same_complexity a b = weight a = weight b
let leq_complexity a b = weight a <= weight b
let to_string (l, s, p, t, u, n) =
Printf.sprintf "ld=%d st=%d add=%d mul=%d uminus=%d num=%d\n"
l s p t u n
end
(* simplify the dag *)
let algsimp v =
let rec simplification_loop v =
let () = Util.info "simplification step" in
let complexity = Stats.complexity v in
let () = Util.info ("complexity = " ^ (Stats.to_string complexity)) in
let v = (AlgSimp.algsimp @@ Transpose.transpose @@
AlgSimp.algsimp @@ Transpose.transpose) v in
let complexity' = Stats.complexity v in
let () = Util.info ("complexity = " ^ (Stats.to_string complexity')) in
if (Stats.leq_complexity complexity' complexity) then
let () = Util.info "end algsimp" in
v
else
simplification_loop v
in
let () = Util.info "begin algsimp" in
let v = AlgSimp.algsimp v in
if !Magic.network_transposition then simplification_loop v else v

View File

@@ -0,0 +1,22 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val algsimp : Expr.expr list -> Expr.expr list

View File

@@ -0,0 +1,361 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* Here, we take a schedule (produced by schedule.ml) ordering a
sequence of instructions, and produce an annotated schedule. The
annotated schedule has the same ordering as the original schedule,
but is additionally partitioned into nested blocks of temporary
variables. The partitioning is computed via a heuristic algorithm.
The blocking allows the C code that we generate to consist of
nested blocks that help communicate variable lifetimes to the
compiler. *)
open Schedule
open Expr
open Variable
type annotated_schedule =
Annotate of variable list * variable list * variable list * int * aschedule
and aschedule =
ADone
| AInstr of assignment
| ASeq of (annotated_schedule * annotated_schedule)
let addelem a set = if not (List.memq a set) then a :: set else set
let union l =
let f x = addelem x (* let is source of polymorphism *)
in List.fold_right f l
(* set difference a - b *)
let diff a b = List.filter (fun x -> not (List.memq x b)) a
let rec minimize f = function
[] -> failwith "minimize"
| [n] -> n
| n :: rest ->
let x = minimize f rest in
if (f x) >= (f n) then n else x
(* find all variables used inside a scheduling unit *)
let rec find_block_vars = function
Done -> []
| (Instr (Assign (v, x))) -> v :: (find_vars x)
| Par a -> List.flatten (List.map find_block_vars a)
| Seq (a, b) -> (find_block_vars a) @ (find_block_vars b)
let uniq l =
List.fold_right (fun a b -> if List.memq a b then b else a :: b) l []
let has_related x = List.exists (Variable.same_class x)
let rec overlap a b = Util.count (fun y -> has_related y b) a
(* reorder a list of schedules so as to maximize overlap of variables *)
let reorder l =
let rec loop = function
[] -> []
| (a, va) :: b ->
let c =
List.map
(fun (a, x) -> ((a, x), (overlap va x, List.length x))) b in
let c' =
List.sort
(fun (_, (a, la)) (_, (b, lb)) ->
if la < lb || a > b then -1 else 1)
c in
let b' = List.map (fun (a, _) -> a) c' in
a :: (loop b') in
let l' = List.map (fun x -> x, uniq (find_block_vars x)) l in
(* start with smallest block --- does this matter ? *)
match l' with
[] -> []
| _ ->
let m = minimize (fun (_, x) -> (List.length x)) l' in
let l'' = Util.remove m l' in
loop (m :: l'')
(* remove Par blocks *)
let rec linearize = function
| Seq (a, Done) -> linearize a
| Seq (Done, a) -> linearize a
| Seq (a, b) -> Seq (linearize a, linearize b)
(* try to balance nested Par blocks *)
| Par [a] -> linearize a
| Par l ->
let n2 = (List.length l) / 2 in
let rec loop n a b =
if n = 0 then
(List.rev b, a)
else
match a with
[] -> failwith "loop"
| x :: y -> loop (n - 1) y (x :: b)
in let (a, b) = loop n2 (reorder l) []
in linearize (Seq (Par a, Par b))
| x -> x
let subset a b =
List.for_all (fun x -> List.exists (fun y -> x == y) b) a
let use_same_vars (Assign (av, ax)) (Assign (bv, bx)) =
is_temporary av &&
is_temporary bv &&
(let va = Expr.find_vars ax and vb = Expr.find_vars bx in
subset va vb && subset vb va)
let store_to_same_class (Assign (av, ax)) (Assign (bv, bx)) =
is_locative av &&
is_locative bv &&
Variable.same_class av bv
let loads_from_same_class (Assign (av, ax)) (Assign (bv, bx)) =
match (ax, bx) with
| (Load a), (Load b) when
Variable.is_locative a && Variable.is_locative b
-> Variable.same_class a b
| _ -> false
(* extract instructions from schedule *)
let rec sched_to_ilist = function
| Done -> []
| Instr a -> [a]
| Seq (a, b) -> (sched_to_ilist a) @ (sched_to_ilist b)
| _ -> failwith "sched_to_ilist" (* Par blocks removed by linearize *)
let rec find_friends friendp insn friends foes = function
| [] -> (friends, foes)
| a :: b ->
if (a == insn) || (friendp a insn) then
find_friends friendp insn (a :: friends) foes b
else
find_friends friendp insn friends (a :: foes) b
(* schedule all instructions in the equivalence class determined
by friendp at the point where the last one
is executed *)
let rec delay_friends friendp sched =
let rec recur insns = function
| Done -> (Done, insns)
| Instr a ->
let (friends, foes) = find_friends friendp a [] [] insns in
(Schedule.sequentially friends), foes
| Seq (a, b) ->
let (b', insnsb) = recur insns b in
let (a', insnsa) = recur insnsb a in
(Seq (a', b')), insnsa
| _ -> failwith "delay_friends"
in match recur (sched_to_ilist sched) sched with
| (s, []) -> s (* assert that all insns have been used *)
| _ -> failwith "delay_friends"
(* schedule all instructions in the equivalence class determined
by friendp at the point where the first one
is executed *)
let rec anticipate_friends friendp sched =
let rec recur insns = function
| Done -> (Done, insns)
| Instr a ->
let (friends, foes) = find_friends friendp a [] [] insns in
(Schedule.sequentially friends), foes
| Seq (a, b) ->
let (a', insnsa) = recur insns a in
let (b', insnsb) = recur insnsa b in
(Seq (a', b')), insnsb
| _ -> failwith "anticipate_friends"
in match recur (sched_to_ilist sched) sched with
| (s, []) -> s (* assert that all insns have been used *)
| _ -> failwith "anticipate_friends"
let collect_buddy_stores buddy_list sched =
let rec recur sched delayed_stores = match sched with
| Done -> (sched, delayed_stores)
| Instr (Assign (v, x)) ->
begin
try
let buddies = List.find (List.memq v) buddy_list in
let tmp = Variable.make_temporary () in
let i = Seq(Instr (Assign (tmp, x)),
Instr (Assign (v, Times (NaN MULTI_A, Load tmp))))
and delayed_stores = (v, Load tmp) :: delayed_stores in
try
(Seq (i,
Instr (Assign
(List.hd buddies,
Times (NaN MULTI_B,
Plus (List.map
(fun buddy ->
List.assq buddy
delayed_stores)
buddies))) )))
, delayed_stores
with Not_found -> (i, delayed_stores)
with Not_found -> (sched, delayed_stores)
end
| Seq (a, b) ->
let (newa, delayed_stores) = recur a delayed_stores in
let (newb, delayed_stores) = recur b delayed_stores in
(Seq (newa, newb), delayed_stores)
| _ -> failwith "collect_buddy_stores"
in let (sched, _) = recur sched [] in
sched
let schedule_for_pipeline sched =
let update_readytimes t (Assign (v, _)) ready_times =
(v, (t + !Magic.pipeline_latency)) :: ready_times
and readyp t ready_times (Assign (_, x)) =
List.for_all
(fun var ->
try
(List.assq var ready_times) <= t
with Not_found -> false)
(List.filter Variable.is_temporary (Expr.find_vars x))
in
let rec recur sched t ready_times delayed_instructions =
let (ready, not_ready) =
List.partition (readyp t ready_times) delayed_instructions
in match ready with
| a :: b ->
let (sched, t, ready_times, delayed_instructions) =
recur sched (t+1) (update_readytimes t a ready_times)
(b @ not_ready)
in
(Seq (Instr a, sched)), t, ready_times, delayed_instructions
| _ -> (match sched with
| Done -> (sched, t, ready_times, delayed_instructions)
| Instr a ->
if (readyp t ready_times a) then
(sched, (t+1), (update_readytimes t a ready_times),
delayed_instructions)
else
(Done, t, ready_times, (a :: delayed_instructions))
| Seq (a, b) ->
let (a, t, ready_times, delayed_instructions) =
recur a t ready_times delayed_instructions
in
let (b, t, ready_times, delayed_instructions) =
recur b t ready_times delayed_instructions
in (Seq (a, b)), t, ready_times, delayed_instructions
| _ -> failwith "schedule_for_pipeline")
in let rec recur_until_done sched t ready_times delayed_instructions =
let (sched, t, ready_times, delayed_instructions) =
recur sched t ready_times delayed_instructions
in match delayed_instructions with
| [] -> sched
| _ ->
(Seq (sched,
(recur_until_done Done (t+1) ready_times
delayed_instructions)))
in recur_until_done sched 0 [] []
let rec rewrite_declarations force_declarations
(Annotate (_, _, declared, _, what)) =
let m = !Magic.number_of_variables in
let declare_it declared =
if (force_declarations || List.length declared >= m) then
([], declared)
else
(declared, [])
in match what with
ADone -> Annotate ([], [], [], 0, what)
| AInstr i ->
let (u, d) = declare_it declared
in Annotate ([], u, d, 0, what)
| ASeq (a, b) ->
let ma = rewrite_declarations false a
and mb = rewrite_declarations false b
in let Annotate (_, ua, _, _, _) = ma
and Annotate (_, ub, _, _, _) = mb
in let (u, d) = declare_it (declared @ ua @ ub)
in Annotate ([], u, d, 0, ASeq (ma, mb))
let annotate list_of_buddy_stores schedule =
let rec analyze live_at_end = function
Done -> Annotate (live_at_end, [], [], 0, ADone)
| Instr i -> (match i with
Assign (v, x) ->
let vars = (find_vars x) in
Annotate (Util.remove v (union live_at_end vars), [v], [],
0, AInstr i))
| Seq (a, b) ->
let ab = analyze live_at_end b in
let Annotate (live_at_begin_b, defined_b, _, depth_a, _) = ab in
let aa = analyze live_at_begin_b a in
let Annotate (live_at_begin_a, defined_a, _, depth_b, _) = aa in
let defined = List.filter is_temporary (defined_a @ defined_b) in
let declarable = diff defined live_at_end in
let undeclarable = diff defined declarable
and maxdepth = max depth_a depth_b in
Annotate (live_at_begin_a, undeclarable, declarable,
List.length declarable + maxdepth,
ASeq (aa, ab))
| _ -> failwith "really_analyze"
in
let () = Util.info "begin annotate" in
let x = linearize schedule in
let x =
if (!Magic.schedule_for_pipeline && !Magic.pipeline_latency > 0) then
schedule_for_pipeline x
else
x
in
let x =
if !Magic.reorder_insns then
linearize(anticipate_friends use_same_vars x)
else
x
in
(* delay stores to the real and imaginary parts of the same number *)
let x =
if !Magic.reorder_stores then
linearize(delay_friends store_to_same_class x)
else
x
in
(* move loads of the real and imaginary parts of the same number *)
let x =
if !Magic.reorder_loads then
linearize(anticipate_friends loads_from_same_class x)
else
x
in
let x = collect_buddy_stores list_of_buddy_stores x in
let x = analyze [] x in
let res = rewrite_declarations true x in
let () = Util.info "end annotate" in
res
let rec dump print (Annotate (_, _, _, _, code)) =
dump_code print code
and dump_code print = function
| ADone -> ()
| AInstr x -> print ((assignment_to_string x) ^ "\n")
| ASeq (a, b) -> dump print a; dump print b

View File

@@ -0,0 +1,36 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Variable
open Expr
type annotated_schedule =
Annotate of variable list * variable list * variable list *
int * aschedule
and aschedule =
ADone
| AInstr of assignment
| ASeq of (annotated_schedule * annotated_schedule)
val annotate :
variable list list -> Schedule.schedule -> annotated_schedule
val dump : (string -> unit) -> annotated_schedule -> unit

View File

@@ -0,0 +1,65 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*************************************************************
* Functional associative table
*************************************************************)
(*
* this module implements a functional associative table.
* The table is parametrized by an equality predicate and
* a hash function, with the restriction that (equal a b) ==>
* hash a == hash b.
* The table is purely functional and implemented using a binary
* search tree (not balanced for now)
*)
type ('a, 'b) elem =
Leaf
| Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list
let empty = Leaf
let lookup hash equal key table =
let h = hash key in
let rec look = function
Leaf -> None
| Node (hash_key, left, right, this_list) ->
if (hash_key < h) then look left
else if (hash_key > h) then look right
else let rec loop = function
[] -> None
| (a, b) :: rest -> if (equal key a) then Some b else loop rest
in loop this_list
in look table
let insert hash key value table =
let h = hash key in
let rec ins = function
Leaf -> Node (h, Leaf, Leaf, [(key, value)])
| Node (hash_key, left, right, this_list) ->
if (hash_key < h) then
Node (hash_key, ins left, right, this_list)
else if (hash_key > h) then
Node (hash_key, left, ins right, this_list)
else
Node (hash_key, left, right, (key, value) :: this_list)
in ins table

View File

@@ -0,0 +1,29 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type ('a, 'b) elem =
| Leaf
| Node of int * ('a, 'b) elem * ('a, 'b) elem * ('a * 'b) list
val empty : ('a, 'b) elem
val lookup :
('a -> int) -> ('a -> 'b -> bool) -> 'a -> ('b, 'c) elem -> 'c option
val insert :
('a -> int) -> 'a -> 'c -> ('a, 'c) elem -> ('a, 'c) elem

461
fftw-3.3.10/genfft/c.ml Normal file
View File

@@ -0,0 +1,461 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*
* This module contains the definition of a C-like abstract
* syntax tree, and functions to convert ML values into C
* programs
*)
open Expr
open Annotate
open List
let realtype = "R"
let realtypep = realtype ^ " *"
let extended_realtype = "E"
let constrealtype = "const " ^ realtype
let constrealtypep = constrealtype ^ " *"
let stridetype = "stride"
(***********************************
* C program structure
***********************************)
type c_decl =
| Decl of string * string
| Tdecl of string (* arbitrary text declaration *)
and c_ast =
| Asch of annotated_schedule
| Simd_leavefun
| Return of c_ast
| For of c_ast * c_ast * c_ast * c_ast
| If of c_ast * c_ast
| Block of (c_decl list) * (c_ast list)
| Binop of string * c_ast * c_ast
| Expr_assign of c_ast * c_ast
| Stmt_assign of c_ast * c_ast
| Comma of c_ast * c_ast
| Integer of int
| CVar of string
| CCall of string * c_ast
| CPlus of c_ast list
| ITimes of c_ast * c_ast
| CUminus of c_ast
and c_fcn = Fcn of string * string * (c_decl list) * c_ast
let ctimes = function
| (Integer 1), a -> a
| a, (Integer 1) -> a
| a, b -> ITimes (a, b)
(*
* C AST unparser
*)
let foldr_string_concat l = fold_right (^) l ""
let rec unparse_expr_c =
let yes x = x and no x = "" in
let rec unparse_plus maybe =
let maybep = maybe " + " in
function
| [] -> ""
| (Uminus (Times (a, b))) :: (Uminus c) :: d ->
maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d)
| (Uminus c) :: (Uminus (Times (a, b))) :: d ->
maybep ^ (op "FNMA" a b c) ^ (unparse_plus yes d)
| (Uminus (Times (a, b))) :: c :: d ->
maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d)
| c :: (Uminus (Times (a, b))) :: d ->
maybep ^ (op "FNMS" a b c) ^ (unparse_plus yes d)
| (Times (a, b)) :: (Uminus c) :: d ->
maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d)
| (Uminus c) :: (Times (a, b)) :: d ->
maybep ^ (op "FMS" a b c) ^ (unparse_plus yes d)
| (Times (a, b)) :: c :: d ->
maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d)
| c :: (Times (a, b)) :: d ->
maybep ^ (op "FMA" a b c) ^ (unparse_plus yes d)
| (Uminus a :: b) ->
" - " ^ (parenthesize a) ^ (unparse_plus yes b)
| (a :: b) ->
maybep ^ (parenthesize a) ^ (unparse_plus yes b)
and parenthesize x = match x with
| (Load _) -> unparse_expr_c x
| (Num _) -> unparse_expr_c x
| _ -> "(" ^ (unparse_expr_c x) ^ ")"
and op nam a b c =
nam ^ "(" ^ (unparse_expr_c a) ^ ", " ^ (unparse_expr_c b) ^ ", " ^
(unparse_expr_c c) ^ ")"
in function
| Load v -> Variable.unparse v
| Num n -> Number.to_konst n
| Plus [] -> "0.0 /* bug */"
| Plus [a] -> " /* bug */ " ^ (unparse_expr_c a)
| Plus a -> (unparse_plus no a)
| Times (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b)
| Uminus (Plus [a; Uminus b]) -> unparse_plus no [b; Uminus a]
| Uminus a -> "- " ^ (parenthesize a)
| _ -> failwith "unparse_expr_c"
and unparse_expr_generic =
let rec u x = unparse_expr_generic x
and unary op a = Printf.sprintf "%s(%s)" op (u a)
and binary op a b = Printf.sprintf "%s(%s, %s)" op (u a) (u b)
and ternary op a b c = Printf.sprintf "%s(%s, %s, %s)" op (u a) (u b) (u c)
and quaternary op a b c d =
Printf.sprintf "%s(%s, %s, %s, %s)" op (u a) (u b) (u c) (u d)
and unparse_plus = function
| [(Uminus (Times (a, b))); Times (c, d)] -> quaternary "FNMMS" a b c d
| [Times (c, d); (Uminus (Times (a, b)))] -> quaternary "FNMMS" a b c d
| [Times (c, d); (Times (a, b))] -> quaternary "FMMA" a b c d
| [(Uminus (Times (a, b))); c] -> ternary "FNMS" a b c
| [c; (Uminus (Times (a, b)))] -> ternary "FNMS" a b c
| [(Uminus c); (Times (a, b))] -> ternary "FMS" a b c
| [(Times (a, b)); (Uminus c)] -> ternary "FMS" a b c
| [c; (Times (a, b))] -> ternary "FMA" a b c
| [(Times (a, b)); c] -> ternary "FMA" a b c
| [a; Uminus b] -> binary "SUB" a b
| [a; b] -> binary "ADD" a b
| a :: b :: c -> binary "ADD" a (Plus (b :: c))
| _ -> failwith "unparse_plus"
in function
| Load v -> Variable.unparse v
| Num n -> Number.to_konst n
| Plus a -> unparse_plus a
| Times (a, b) -> binary "MUL" a b
| Uminus a -> unary "NEG" a
| _ -> failwith "unparse_expr"
and unparse_expr x =
if !Magic.generic_arith then
unparse_expr_generic x
else
unparse_expr_c x
and unparse_assignment (Assign (v, x)) =
(Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n"
and unparse_annotated force_bracket =
let rec unparse_code = function
ADone -> ""
| AInstr i -> unparse_assignment i
| ASeq (a, b) ->
(unparse_annotated false a) ^ (unparse_annotated false b)
and declare_variables l =
let rec uvar = function
[] -> failwith "uvar"
| [v] -> (Variable.unparse v) ^ ";\n"
| a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b)
in let rec vvar l =
let s = if !Magic.compact then 15 else 1 in
if (List.length l <= s) then
match l with
[] -> ""
| _ -> extended_realtype ^ " " ^ (uvar l)
else
(vvar (Util.take s l)) ^ (vvar (Util.drop s l))
in vvar (List.filter Variable.is_temporary l)
in function
Annotate (_, _, decl, _, code) ->
if (not force_bracket) && (Util.null decl) then
unparse_code code
else "{\n" ^
(declare_variables decl) ^
(unparse_code code) ^
"}\n"
and unparse_decl = function
| Decl (a, b) -> a ^ " " ^ b ^ ";\n"
| Tdecl x -> x
and unparse_ast =
let rec unparse_plus = function
| [] -> ""
| (CUminus a :: b) -> " - " ^ (parenthesize a) ^ (unparse_plus b)
| (a :: b) -> " + " ^ (parenthesize a) ^ (unparse_plus b)
and parenthesize x = match x with
| (CVar _) -> unparse_ast x
| (CCall _) -> unparse_ast x
| (Integer _) -> unparse_ast x
| _ -> "(" ^ (unparse_ast x) ^ ")"
in
function
| Asch a -> (unparse_annotated true a)
| Simd_leavefun -> "" (* used only in SIMD code *)
| Return x -> "return " ^ unparse_ast x ^ ";"
| For (a, b, c, d) ->
"for (" ^
unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c
^ ")" ^ unparse_ast d
| If (a, d) ->
"if (" ^
unparse_ast a
^ ")" ^ unparse_ast d
| Block (d, s) ->
if (s == []) then ""
else
"{\n" ^
foldr_string_concat (map unparse_decl d) ^
foldr_string_concat (map unparse_ast s) ^
"}\n"
| Binop (op, a, b) -> (unparse_ast a) ^ op ^ (unparse_ast b)
| Expr_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b)
| Stmt_assign (a, b) -> (unparse_ast a) ^ " = " ^ (unparse_ast b) ^ ";\n"
| Comma (a, b) -> (unparse_ast a) ^ ", " ^ (unparse_ast b)
| Integer i -> string_of_int i
| CVar s -> s
| CCall (s, x) -> s ^ "(" ^ (unparse_ast x) ^ ")"
| CPlus [] -> "0 /* bug */"
| CPlus [a] -> " /* bug */ " ^ (unparse_ast a)
| CPlus (a::b) -> (parenthesize a) ^ (unparse_plus b)
| ITimes (a, b) -> (parenthesize a) ^ " * " ^ (parenthesize b)
| CUminus a -> "- " ^ (parenthesize a)
and unparse_function = function
Fcn (typ, name, args, body) ->
let rec unparse_args = function
[Decl (a, b)] -> a ^ " " ^ b
| (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", "
^ unparse_args s
| [] -> ""
| _ -> failwith "unparse_function"
in
(typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^
unparse_ast body)
(*************************************************************
* traverse a a function and return a list of all expressions,
* in the execution order
**************************************************************)
let rec fcn_to_expr_list = fun (Fcn (_, _, _, body)) -> ast_to_expr_list body
and acode_to_expr_list = function
AInstr (Assign (_, x)) -> [x]
| ASeq (a, b) ->
(asched_to_expr_list a) @ (asched_to_expr_list b)
| _ -> []
and asched_to_expr_list (Annotate (_, _, _, _, code)) =
acode_to_expr_list code
and ast_to_expr_list = function
Asch a -> asched_to_expr_list a
| Block (_, a) -> flatten (map ast_to_expr_list a)
| For (_, _, _, body) -> ast_to_expr_list body
| If (_, body) -> ast_to_expr_list body
| _ -> []
(***********************
* Extracting Constants
***********************)
(* add a new key & value to a list of (key,value) pairs, where
the keys are floats and each key is unique up to almost_equal *)
let extract_constants f =
let constlist = flatten (map expr_to_constants (ast_to_expr_list f))
in map
(fun n ->
Tdecl
("DK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^
");\n"))
(unique_constants constlist)
(******************************
Extracting operation counts
******************************)
let count_stack_vars =
let rec count_acode = function
| ASeq (a, b) -> max (count_asched a) (count_asched b)
| _ -> 0
and count_asched (Annotate (_, _, decl, _, code)) =
(length decl) + (count_acode code)
and count_ast = function
| Asch a -> count_asched a
| Block (d, a) -> (length d) + (Util.max_list (map count_ast a))
| For (_, _, _, body) -> count_ast body
| If (_, body) -> count_ast body
| _ -> 0
in function (Fcn (_, _, _, body)) -> count_ast body
let count_memory_acc f =
let rec count_var v =
if (Variable.is_locative v) then 1 else 0
and count_acode = function
| AInstr (Assign (v, _)) -> count_var v
| ASeq (a, b) -> (count_asched a) + (count_asched b)
| _ -> 0
and count_asched = function
Annotate (_, _, _, _, code) -> count_acode code
and count_ast = function
| Asch a -> count_asched a
| Block (_, a) -> (Util.sum_list (map count_ast a))
| Comma (a, b) -> (count_ast a) + (count_ast b)
| For (_, _, _, body) -> count_ast body
| If (_, body) -> count_ast body
| _ -> 0
and count_acc_expr_func acc = function
| Load v -> acc + (count_var v)
| Plus a -> fold_left count_acc_expr_func acc a
| Times (a, b) -> fold_left count_acc_expr_func acc [a; b]
| Uminus a -> count_acc_expr_func acc a
| _ -> acc
in let (Fcn (typ, name, args, body)) = f
in (count_ast body) +
fold_left count_acc_expr_func 0 (fcn_to_expr_list f)
let good_for_fma = To_alist.good_for_fma
let build_fma = function
| [a; Times (b, c)] when good_for_fma (b, c) -> Some (a, b, c)
| [Times (b, c); a] when good_for_fma (b, c) -> Some (a, b, c)
| [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> Some (a, b, c)
| [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> Some (a, b, c)
| _ -> None
let rec count_flops_expr_func (adds, mults, fmas) = function
| Plus [] -> (adds, mults, fmas)
| Plus ([_; _] as a) ->
begin
match build_fma a with
| None ->
fold_left count_flops_expr_func
(adds + (length a) - 1, mults, fmas) a
| Some (a, b, c) ->
fold_left count_flops_expr_func (adds, mults, fmas+1) [a; b; c]
end
| Plus (a :: b) ->
count_flops_expr_func (adds, mults, fmas) (Plus [a; Plus b])
| Times (NaN MULTI_A,_) -> (adds, mults, fmas)
| Times (NaN MULTI_B,_) -> (adds, mults, fmas)
| Times (NaN I,b) -> count_flops_expr_func (adds, mults, fmas) b
| Times (NaN CONJ,b) -> count_flops_expr_func (adds, mults, fmas) b
| Times (a,b) -> fold_left count_flops_expr_func (adds, mults+1, fmas) [a; b]
| CTimes (a,b) ->
fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b]
| CTimesJ (a,b) ->
fold_left count_flops_expr_func (adds+1, mults+2, fmas) [a; b]
| Uminus a -> count_flops_expr_func (adds, mults, fmas) a
| _ -> (adds, mults, fmas)
let count_flops f =
fold_left count_flops_expr_func (0, 0, 0) (fcn_to_expr_list f)
let count_constants f =
length (unique_constants (flatten (map expr_to_constants (fcn_to_expr_list f))))
let arith_complexity f =
let (a, m, fmas) = count_flops f
and v = count_stack_vars f
and c = count_constants f
and mem = count_memory_acc f
in (a, m, fmas, v, c, mem)
(* print the operation costs *)
let print_cost f =
let Fcn (_, _, _, _) = f
and (a, m, fmas, v, c, mem) = arith_complexity f
in
"/*\n"^
" * This function contains " ^
(string_of_int (a + fmas)) ^ " FP additions, " ^
(string_of_int (m + fmas)) ^ " FP multiplications,\n" ^
" * (or, " ^
(string_of_int a) ^ " additions, " ^
(string_of_int m) ^ " multiplications, " ^
(string_of_int fmas) ^ " fused multiply/add),\n" ^
" * " ^ (string_of_int v) ^ " stack variables, " ^
(string_of_int c) ^ " constants, and " ^
(string_of_int mem) ^ " memory accesses\n" ^
" */\n"
(*****************************************
* functions that create C arrays
*****************************************)
type stride =
| SVar of string
| SConst of string
| SInteger of int
| SNeg of stride
type sstride =
| Simple of int
| Constant of (string * int)
| Composite of (string * int)
| Negative of sstride
let rec simplify_stride stride i =
match (stride, i) with
(_, 0) -> Simple 0
| (SInteger n, i) -> Simple (n * i)
| (SConst s, i) -> Constant (s, i)
| (SVar s, i) -> Composite (s, i)
| (SNeg x, i) ->
match (simplify_stride x i) with
| Negative y -> y
| y -> Negative y
let rec cstride_to_string = function
| Simple i -> string_of_int i
| Constant (s, i) ->
if !Magic.lisp_syntax then
"(* " ^ s ^ " " ^ (string_of_int i) ^ ")"
else
s ^ " * " ^ (string_of_int i)
| Composite (s, i) ->
if !Magic.lisp_syntax then
"(* " ^ s ^ " " ^ (string_of_int i) ^ ")"
else
"WS(" ^ s ^ ", " ^ (string_of_int i) ^ ")"
| Negative x -> "-" ^ cstride_to_string x
let aref name index =
if !Magic.lisp_syntax then
Printf.sprintf "(aref %s %s)" name index
else
Printf.sprintf "%s[%s]" name index
let array_subscript name stride k =
aref name (cstride_to_string (simplify_stride stride k))
let varray_subscript name vstride stride v i =
let vindex = simplify_stride vstride v
and iindex = simplify_stride stride i
in
let index =
match (vindex, iindex) with
(Simple vi, Simple ii) -> string_of_int (vi + ii)
| (Simple 0, x) -> cstride_to_string x
| (x, Simple 0) -> cstride_to_string x
| _ -> (cstride_to_string vindex) ^ " + " ^ (cstride_to_string iindex)
in aref name index
let real_of s = "c_re(" ^ s ^ ")"
let imag_of s = "c_im(" ^ s ^ ")"
let flops_of f =
let (add, mul, fma) = count_flops f in
Printf.sprintf "{ %d, %d, %d, 0 }" add mul fma

74
fftw-3.3.10/genfft/c.mli Normal file
View File

@@ -0,0 +1,74 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type stride =
| SVar of string
| SConst of string
| SInteger of int
| SNeg of stride
val array_subscript : string -> stride -> int -> string
val varray_subscript : string -> stride -> stride -> int -> int -> string
val real_of : string -> string
val imag_of : string -> string
val realtype : string
val realtypep : string
val constrealtype : string
val constrealtypep : string
val stridetype : string
type c_decl =
| Decl of string * string
| Tdecl of string (* arbitrary text declaration *)
and c_ast =
| Asch of Annotate.annotated_schedule
| Simd_leavefun
| Return of c_ast
| For of c_ast * c_ast * c_ast * c_ast
| If of c_ast * c_ast
| Block of (c_decl list) * (c_ast list)
| Binop of string * c_ast * c_ast
| Expr_assign of c_ast * c_ast
| Stmt_assign of c_ast * c_ast
| Comma of c_ast * c_ast
| Integer of int
| CVar of string
| CCall of string * c_ast
| CPlus of c_ast list
| ITimes of c_ast * c_ast
| CUminus of c_ast
and c_fcn = | Fcn of string * string * c_decl list * c_ast
val unparse_expr : Expr.expr -> string
val unparse_assignment : Expr.assignment -> string
val unparse_annotated : bool -> Annotate.annotated_schedule -> string
val unparse_decl : c_decl -> string
val unparse_ast : c_ast -> string
val unparse_function : c_fcn -> string
val flops_of : c_fcn -> string
val print_cost : c_fcn -> string
val ast_to_expr_list : c_ast -> Expr.expr list
val extract_constants : c_ast -> c_decl list
val ctimes : (c_ast * c_ast) -> c_ast

View File

@@ -0,0 +1,147 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* abstraction layer for complex operations *)
open Littlesimp
open Expr
(* type of complex expressions *)
type expr = CE of Expr.expr * Expr.expr
let two = CE (makeNum Number.two, makeNum Number.zero)
let one = CE (makeNum Number.one, makeNum Number.zero)
let i = CE (makeNum Number.zero, makeNum Number.one)
let zero = CE (makeNum Number.zero, makeNum Number.zero)
let make (r, i) = CE (r, i)
let uminus (CE (a, b)) = CE (makeUminus a, makeUminus b)
let inverse_int n = CE (makeNum (Number.div Number.one (Number.of_int n)),
makeNum Number.zero)
let inverse_int_sqrt n =
CE (makeNum (Number.div Number.one (Number.sqrt (Number.of_int n))),
makeNum Number.zero)
let int_sqrt n =
CE (makeNum (Number.sqrt (Number.of_int n)),
makeNum Number.zero)
let nan x = CE (NaN x, makeNum Number.zero)
let half = inverse_int 2
let times3x3 (CE (a, b)) (CE (c, d)) =
CE (makePlus [makeTimes (c, makePlus [a; makeUminus (b)]);
makeTimes (b, makePlus [c; makeUminus (d)])],
makePlus [makeTimes (a, makePlus [c; d]);
makeUminus(makeTimes (c, makePlus [a; makeUminus (b)]))])
let times (CE (a, b)) (CE (c, d)) =
if not !Magic.threemult then
CE (makePlus [makeTimes (a, c); makeUminus (makeTimes (b, d))],
makePlus [makeTimes (a, d); makeTimes (b, c)])
else if is_constant c && is_constant d then
times3x3 (CE (a, b)) (CE (c, d))
else (* hope a and b are constant expressions *)
times3x3 (CE (c, d)) (CE (a, b))
let ctimes (CE (a, _)) (CE (c, _)) =
CE (CTimes (a, c), makeNum Number.zero)
let ctimesj (CE (a, _)) (CE (c, _)) =
CE (CTimesJ (a, c), makeNum Number.zero)
(* complex exponential (of root of unity); returns exp(2*pi*i/n * m) *)
let exp n i =
let (c, s) = Number.cexp n i
in CE (makeNum c, makeNum s)
(* various trig functions evaluated at (2*pi*i/n * m) *)
let sec n m =
let (c, s) = Number.cexp n m
in CE (makeNum (Number.div Number.one c), makeNum Number.zero)
let csc n m =
let (c, s) = Number.cexp n m
in CE (makeNum (Number.div Number.one s), makeNum Number.zero)
let tan n m =
let (c, s) = Number.cexp n m
in CE (makeNum (Number.div s c), makeNum Number.zero)
let cot n m =
let (c, s) = Number.cexp n m
in CE (makeNum (Number.div c s), makeNum Number.zero)
(* complex sum *)
let plus a =
let rec unzip_complex = function
[] -> ([], [])
| ((CE (a, b)) :: s) ->
let (r,i) = unzip_complex s
in
(a::r), (b::i) in
let (c, d) = unzip_complex a in
CE (makePlus c, makePlus d)
(* extract real/imaginary *)
let real (CE (a, b)) = CE (a, makeNum Number.zero)
let imag (CE (a, b)) = CE (b, makeNum Number.zero)
let iimag (CE (a, b)) = CE (makeNum Number.zero, b)
let conj (CE (a, b)) = CE (a, makeUminus b)
(* abstraction of sum_{i=0}^{n-1} *)
let sigma a b f = plus (List.map f (Util.interval a b))
(* store and assignment operations *)
let store_real v (CE (a, b)) = Expr.Store (v, a)
let store_imag v (CE (a, b)) = Expr.Store (v, b)
let store (vr, vi) x = (store_real vr x, store_imag vi x)
let assign_real v (CE (a, b)) = Expr.Assign (v, a)
let assign_imag v (CE (a, b)) = Expr.Assign (v, b)
let assign (vr, vi) x = (assign_real vr x, assign_imag vi x)
(************************
shortcuts
************************)
let (@*) = times
let (@+) a b = plus [a; b]
let (@-) a b = plus [a; uminus b]
(* type of complex signals *)
type signal = int -> expr
(* make a finite signal infinite *)
let infinite n signal i = if ((0 <= i) && (i < n)) then signal i else zero
let hermitian n a =
Util.array n (fun i ->
if (i = 0) then real (a 0)
else if (i < n - i) then (a i)
else if (i > n - i) then conj (a (n - i))
else real (a i))
let antihermitian n a =
Util.array n (fun i ->
if (i = 0) then iimag (a 0)
else if (i < n - i) then (a i)
else if (i > n - i) then uminus (conj (a (n - i)))
else iimag (a i))

View File

@@ -0,0 +1,68 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type expr
val make : (Expr.expr * Expr.expr) -> expr
val two : expr
val one : expr
val i : expr
val zero : expr
val half : expr
val inverse_int : int -> expr
val inverse_int_sqrt : int -> expr
val int_sqrt : int -> expr
val times : expr -> expr -> expr
val ctimes : expr -> expr -> expr
val ctimesj : expr -> expr -> expr
val uminus : expr -> expr
val exp : int -> int -> expr
val sec : int -> int -> expr
val csc : int -> int -> expr
val tan : int -> int -> expr
val cot : int -> int -> expr
val plus : expr list -> expr
val real : expr -> expr
val imag : expr -> expr
val conj : expr -> expr
val nan : Expr.transcendent -> expr
val sigma : int -> int -> (int -> expr) -> expr
val (@*) : expr -> expr -> expr
val (@+) : expr -> expr -> expr
val (@-) : expr -> expr -> expr
(* a signal is a map from integers to expressions *)
type signal = int -> expr
val infinite : int -> signal -> signal
val store_real : Variable.variable -> expr -> Expr.expr
val store_imag : Variable.variable -> expr -> Expr.expr
val store :
Variable.variable * Variable.variable -> expr -> Expr.expr * Expr.expr
val assign_real : Variable.variable -> expr -> Expr.assignment
val assign_imag : Variable.variable -> expr -> Expr.assignment
val assign :
Variable.variable * Variable.variable ->
expr -> Expr.assignment * Expr.assignment
val hermitian : int -> (int -> expr) -> int -> expr
val antihermitian : int -> (int -> expr) -> int -> expr

130
fftw-3.3.10/genfft/conv.ml Normal file
View File

@@ -0,0 +1,130 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Complex
open Util
let polyphase m a ph i = a (m * i + ph)
let rec divmod n i =
if (i < 0) then
let (a, b) = divmod n (i + n)
in (a - 1, b)
else (i / n, i mod n)
let unpolyphase m a i = let (x, y) = divmod m i in a y x
let lift2 f a b i = f (a i) (b i)
(* convolution of signals A and B *)
let rec conv na a nb b =
let rec naive na a nb b i =
sigma 0 na (fun j -> (a j) @* (b (i - j)))
and recur na a nb b =
if (na <= 1 || nb <= 1) then
naive na a nb b
else
let p = polyphase 2 in
let ee = conv (na - na / 2) (p a 0) (nb - nb / 2) (p b 0)
and eo = conv (na - na / 2) (p a 0) (nb / 2) (p b 1)
and oe = conv (na / 2) (p a 1) (nb - nb / 2) (p b 0)
and oo = conv (na / 2) (p a 1) (nb / 2) (p b 1) in
unpolyphase 2 (function
0 -> fun i -> (ee i) @+ (oo (i - 1))
| 1 -> fun i -> (eo i) @+ (oe i)
| _ -> failwith "recur")
(* Karatsuba variant 1: (a+bx)(c+dx) = (ac+bdxx)+((a+b)(c+d)-ac-bd)x *)
and karatsuba1 na a nb b =
let p = polyphase 2 in
let ae = p a 0 and nae = na - na / 2
and ao = p a 1 and nao = na / 2
and be = p b 0 and nbe = nb - nb / 2
and bo = p b 1 and nbo = nb / 2 in
let ae = infinite nae ae and ao = infinite nao ao
and be = infinite nbe be and bo = infinite nbo bo in
let aeo = lift2 (@+) ae ao and naeo = nae
and beo = lift2 (@+) be bo and nbeo = nbe in
let ee = conv nae ae nbe be
and oo = conv nao ao nbo bo
and eoeo = conv naeo aeo nbeo beo in
let q = function
0 -> fun i -> (ee i) @+ (oo (i - 1))
| 1 -> fun i -> (eoeo i) @- ((ee i) @+ (oo i))
| _ -> failwith "karatsuba1" in
unpolyphase 2 q
(* Karatsuba variant 2:
(a+bx)(c+dx) = ((a+b)c-b(c-dxx))+x((a+b)c-a(c-d)) *)
and karatsuba2 na a nb b =
let p = polyphase 2 in
let ae = p a 0 and nae = na - na / 2
and ao = p a 1 and nao = na / 2
and be = p b 0 and nbe = nb - nb / 2
and bo = p b 1 and nbo = nb / 2 in
let ae = infinite nae ae and ao = infinite nao ao
and be = infinite nbe be and bo = infinite nbo bo in
let c1 = conv nae (lift2 (@+) ae ao) nbe be
and c2 = conv nao ao (nbo + 1) (fun i -> be i @- bo (i - 1))
and c3 = conv nae ae nbe (lift2 (@-) be bo) in
let q = function
0 -> lift2 (@-) c1 c2
| 1 -> lift2 (@-) c1 c3
| _ -> failwith "karatsuba2" in
unpolyphase 2 q
and karatsuba na a nb b =
let m = na + nb - 1 in
if (m < !Magic.karatsuba_min) then
recur na a nb b
else
match !Magic.karatsuba_variant with
1 -> karatsuba1 na a nb b
| 2 -> karatsuba2 na a nb b
| _ -> failwith "unknown karatsuba variant"
and via_circular na a nb b =
let m = na + nb - 1 in
if (m < !Magic.circular_min) then
karatsuba na a nb b
else
let rec find_min n = if n >= m then n else find_min (2 * n) in
circular (find_min 1) a b
in
let a = infinite na a and b = infinite nb b in
let res = array (na + nb - 1) (via_circular na a nb b) in
infinite (na + nb - 1) res
and circular n a b =
let via_dft n a b =
let fa = Fft.dft (-1) n a
and fb = Fft.dft (-1) n b
and scale = inverse_int n in
let fab i = ((fa i) @* (fb i)) @* scale in
Fft.dft 1 n fab
in via_dft n a b

View File

@@ -0,0 +1,22 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val conv : int -> Complex.signal -> int -> Complex.signal -> Complex.signal

109
fftw-3.3.10/genfft/dag.ml Normal file
View File

@@ -0,0 +1,109 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
(* Here, we have functions to transform a sequence of assignments
(variable = expression) into a DAG (a directed, acyclic graph).
The nodes of the DAG are the assignments, and the edges indicate
dependencies. (The DAG is analyzed in the scheduler to find an
efficient ordering of the assignments.)
This file also contains utilities to manipulate the DAG in various
ways. *)
(********************************************
* Dag structure
********************************************)
type color = RED | BLUE | BLACK | YELLOW
type dagnode =
{ assigned: Variable.variable;
mutable expression: Expr.expr;
input_variables: Variable.variable list;
mutable successors: dagnode list;
mutable predecessors: dagnode list;
mutable label: int;
mutable color: color}
type dag = Dag of (dagnode list)
(* true if node uses v *)
let node_uses v node =
List.exists (Variable.same v) node.input_variables
(* true if assignment of v clobbers any input of node *)
let node_clobbers node v =
List.exists (Variable.same_location v) node.input_variables
(* true if nodeb depends on nodea *)
let depends_on nodea nodeb =
node_uses nodea.assigned nodeb ||
node_clobbers nodea nodeb.assigned
(* transform an assignment list into a dag *)
let makedag alist =
let dag = List.map
(fun assignment ->
let (v, x) = assignment in
{ assigned = v;
expression = x;
input_variables = Expr.find_vars x;
successors = [];
predecessors = [];
label = 0;
color = BLACK })
alist
in begin
for_list dag (fun i ->
for_list dag (fun j ->
if depends_on i j then begin
i.successors <- j :: i.successors;
j.predecessors <- i :: j.predecessors;
end));
Dag dag;
end
let map f (Dag dag) = Dag (List.map f dag)
let for_all (Dag dag) f =
(* type system loophole *)
let make_unit _ = () in
make_unit (List.map f dag)
let to_list (Dag dag) = dag
let find_node f (Dag dag) = Util.find_elem f dag
(* breadth-first search *)
let rec bfs (Dag dag) node init_label =
let _ = node.label <- init_label in
let rec loop = function
[] -> ()
| node :: rest ->
let neighbors = node.predecessors @ node.successors in
let m = min_list (List.map (fun node -> node.label) neighbors) in
if (node.label > m + 1) then begin
node.label <- m + 1;
loop (rest @ neighbors);
end else
loop rest
in let neighbors = node.predecessors @ node.successors in
loop neighbors

View File

@@ -0,0 +1,43 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
type color = | RED | BLUE | BLACK | YELLOW
type dagnode =
{ assigned: Variable.variable;
mutable expression: Expr.expr;
input_variables: Variable.variable list;
mutable successors: dagnode list;
mutable predecessors: dagnode list;
mutable label: int;
mutable color: color}
type dag
val makedag : (Variable.variable * Expr.expr) list -> dag
val map : (dagnode -> dagnode) -> dag -> dag
val for_all : dag -> (dagnode -> unit) -> unit
val to_list : dag -> (dagnode list)
val bfs : dag -> dagnode -> int -> unit
val find_node : (dagnode -> bool) -> dag -> dagnode option

152
fftw-3.3.10/genfft/expr.ml Normal file
View File

@@ -0,0 +1,152 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* Here, we define the data type encapsulating a symbolic arithmetic
expression, and provide some routines for manipulating it. *)
(* I will regret this hack : *)
(* NEWS: I did *)
type transcendent = I | MULTI_A | MULTI_B | CONJ
type expr =
| Num of Number.number
| NaN of transcendent
| Plus of expr list
| Times of expr * expr
| CTimes of expr * expr
| CTimesJ of expr * expr (* CTimesJ (a, b) = conj(a) * b *)
| Uminus of expr
| Load of Variable.variable
| Store of Variable.variable * expr
type assignment = Assign of Variable.variable * expr
(* various hash functions *)
let hash_float x =
let (mantissa, exponent) = frexp x
in truncate (float_of_int(exponent) *. 1234.567 +. mantissa *. 10000.0)
let sum_list l = List.fold_right (+) l 0
let transcendent_to_float = function
| I -> 2.718281828459045235360287471 (* any transcendent number will do *)
| MULTI_A -> 0.6931471805599453094172321214
| MULTI_B -> -0.3665129205816643270124391582
| CONJ -> 0.6019072301972345747375400015
let rec hash = function
| Num x -> hash_float (Number.to_float x)
| NaN x -> hash_float (transcendent_to_float x)
| Load v -> 1 + 1237 * Variable.hash v
| Store (v, x) -> 2 * Variable.hash v - 2345 * hash x
| Plus l -> 5 + 23451 * sum_list (List.map Hashtbl.hash l)
| Times (a, b) -> 41 + 31415 * (Hashtbl.hash a + Hashtbl.hash b)
| CTimes (a, b) -> 49 + 3245 * (Hashtbl.hash a + Hashtbl.hash b)
| CTimesJ (a, b) -> 31 + 3471 * (Hashtbl.hash a + Hashtbl.hash b)
| Uminus x -> 42 + 12345 * (hash x)
(* find all variables *)
let rec find_vars x =
match x with
| Load y -> [y]
| Plus l -> List.flatten (List.map find_vars l)
| Times (a, b) -> (find_vars a) @ (find_vars b)
| CTimes (a, b) -> (find_vars a) @ (find_vars b)
| CTimesJ (a, b) -> (find_vars a) @ (find_vars b)
| Uminus a -> find_vars a
| _ -> []
(* TRUE if expression is a constant *)
let is_constant = function
| Num _ -> true
| NaN _ -> true
| Load v -> Variable.is_constant v
| _ -> false
let is_known_constant = function
| Num _ -> true
| NaN _ -> true
| _ -> false
(* expr to string, used for debugging *)
let rec foldr_string_concat l =
match l with
[] -> ""
| [a] -> a
| a :: b -> a ^ " " ^ (foldr_string_concat b)
let string_of_transcendent = function
| I -> "I"
| MULTI_A -> "MULTI_A"
| MULTI_B -> "MULTI_B"
| CONJ -> "CONJ"
let rec to_string = function
| Load v -> Variable.unparse v
| Num n -> string_of_float (Number.to_float n)
| NaN n -> string_of_transcendent n
| Plus x -> "(+ " ^ (foldr_string_concat (List.map to_string x)) ^ ")"
| Times (a, b) -> "(* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
| CTimes (a, b) -> "(c* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
| CTimesJ (a, b) -> "(cj* " ^ (to_string a) ^ " " ^ (to_string b) ^ ")"
| Uminus a -> "(- " ^ (to_string a) ^ ")"
| Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^
(to_string a) ^ ")"
let rec to_string_a d x =
if (d = 0) then "..." else match x with
| Load v -> Variable.unparse v
| Num n -> Number.to_konst n
| NaN n -> string_of_transcendent n
| Plus x -> "(+ " ^ (foldr_string_concat (List.map (to_string_a (d - 1)) x)) ^ ")"
| Times (a, b) -> "(* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
| CTimes (a, b) -> "(c* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
| CTimesJ (a, b) -> "(cj* " ^ (to_string_a (d - 1) a) ^ " " ^ (to_string_a (d - 1) b) ^ ")"
| Uminus a -> "(- " ^ (to_string_a (d-1) a) ^ ")"
| Store (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^
(to_string_a (d-1) a) ^ ")"
let to_string = to_string_a 10
let assignment_to_string = function
| Assign (v, a) -> "(:= " ^ (Variable.unparse v) ^ " " ^ (to_string a) ^ ")"
let dump print = List.iter (fun x -> print ((assignment_to_string x) ^ "\n"))
(* find all constants in a given expression *)
let rec expr_to_constants = function
| Num n -> [n]
| Plus a -> List.flatten (List.map expr_to_constants a)
| Times (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
| CTimes (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
| CTimesJ (a, b) -> (expr_to_constants a) @ (expr_to_constants b)
| Uminus a -> expr_to_constants a
| _ -> []
let add_float_key_value list_so_far k =
if List.exists (fun k2 -> Number.equal k k2) list_so_far then
list_so_far
else
k :: list_so_far
let unique_constants = List.fold_left add_float_key_value []

View File

@@ -0,0 +1,51 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type transcendent = I | MULTI_A | MULTI_B | CONJ
type expr =
| Num of Number.number
| NaN of transcendent
| Plus of expr list
| Times of expr * expr
| CTimes of expr * expr
| CTimesJ of expr * expr
| Uminus of expr
| Load of Variable.variable
| Store of Variable.variable * expr
type assignment = Assign of Variable.variable * expr
val hash_float : float -> int
val hash : expr -> int
val to_string : expr -> string
val assignment_to_string : assignment -> string
val transcendent_to_float : transcendent -> float
val string_of_transcendent : transcendent -> string
val find_vars : expr -> Variable.variable list
val is_constant : expr -> bool
val is_known_constant : expr -> bool
val dump : (string -> unit) -> assignment list -> unit
val expr_to_constants : expr -> Number.number list
val unique_constants : Number.number list -> Number.number list

307
fftw-3.3.10/genfft/fft.ml Normal file
View File

@@ -0,0 +1,307 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* This is the part of the generator that actually computes the FFT
in symbolic form *)
open Complex
open Util
(* choose a suitable factor of n *)
let choose_factor n =
(* first choice: i such that gcd(i, n / i) = 1, i as big as possible *)
let choose1 n =
let rec loop i f =
if (i * i > n) then f
else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i
else loop (i + 1) f
in loop 1 1
(* second choice: the biggest factor i of n, where i < sqrt(n), if any *)
and choose2 n =
let rec loop i f =
if (i * i > n) then f
else if ((n mod i) == 0) then loop (i + 1) i
else loop (i + 1) f
in loop 1 1
in let i = choose1 n in
if (i > 1) then i
else choose2 n
let is_power_of_two n = (n > 0) && ((n - 1) land n == 0)
let rec dft_prime sign n input =
let sum filter i =
sigma 0 n (fun j ->
let coeff = filter (exp n (sign * i * j))
in coeff @* (input j)) in
let computation_even = array n (sum identity)
and computation_odd =
let sumr = array n (sum real)
and sumi = array n (sum ((times Complex.i) @@ imag)) in
array n (fun i ->
if (i = 0) then
(* expose some common subexpressions *)
input 0 @+
sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j))
else
let i' = min i (n - i) in
if (i < n - i) then
sumr i' @+ sumi i'
else
sumr i' @- sumi i') in
if (n >= !Magic.rader_min) then
dft_rader sign n input
else if (n == 2) then
computation_even
else
computation_odd
and dft_rader sign p input =
let half =
let one_half = inverse_int 2 in
times one_half
and make_product n a b =
let scale_factor = inverse_int n in
array n (fun i -> a i @* (scale_factor @* b i)) in
(* generates a convolution using ffts. (all arguments are the
same as to gen_convolution, below) *)
let gen_convolution_by_fft n a b addtoall =
let fft_a = dft 1 n a
and fft_b = dft 1 n b in
let fft_ab = make_product n fft_a fft_b
and dc_term i = if (i == 0) then addtoall else zero in
let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i)
and sum = fft_a 0 in
let conv = dft (-1) n fft_ab1 in
(sum, conv)
(* alternate routine for convolution. Seems to work better for
small sizes. I have no idea why. *)
and gen_convolution_by_fft_alt n a b addtoall =
let ap = array n (fun i -> half (a i @+ a ((n - i) mod n)))
and am = array n (fun i -> half (a i @- a ((n - i) mod n)))
and bp = array n (fun i -> half (b i @+ b ((n - i) mod n)))
and bm = array n (fun i -> half (b i @- b ((n - i) mod n)))
in
let fft_ap = dft 1 n ap
and fft_am = dft 1 n am
and fft_bp = dft 1 n bp
and fft_bm = dft 1 n bm in
let fft_abpp = make_product n fft_ap fft_bp
and fft_abpm = make_product n fft_ap fft_bm
and fft_abmp = make_product n fft_am fft_bp
and fft_abmm = make_product n fft_am fft_bm
and sum = fft_ap 0 @+ fft_am 0
and dc_term i = if (i == 0) then addtoall else zero in
let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i)
and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in
let conv1 = dft (-1) n fft_ab1
and conv2 = dft (-1) n fft_ab2 in
let conv = array n (fun i ->
conv1 i @+ conv2 i) in
(sum, conv)
(* generator of assignment list assigning conv to the convolution of
a and b, all of which are of length n. addtoall is added to
all of the elements of the result. Returns (sum, convolution) pair
where sum is the sum of the elements of a. *)
in let gen_convolution =
if (p <= !Magic.alternate_convolution) then
gen_convolution_by_fft_alt
else
gen_convolution_by_fft
(* fft generator for prime n = p using Rader's algorithm for
turning the fft into a convolution, which then can be
performed in a variety of ways *)
in
let g = find_generator p in
let ginv = pow_mod g (p - 2) p in
let input_perm = array p (fun i -> input (pow_mod g i p))
and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p)))
and output_perm = array p (fun i -> pow_mod ginv i p)
in let (sum, conv) =
(gen_convolution (p - 1) input_perm omega_perm (input 0))
in array p (fun i ->
if (i = 0) then
input 0 @+ sum
else
let i' = suchthat 0 (fun i' -> i = output_perm i')
in conv i')
(* our modified version of the conjugate-pair split-radix algorithm,
which reduces the number of multiplications by rescaling the
sub-transforms (power-of-two n's only) *)
and newsplit sign n input =
let rec s n k = (* recursive scale factor *)
if n <= 4 then
one
else
let k4 = (abs k) mod (n / 4) in
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
(s (n / 4) k4') @* (real (exp n k4'))
and sinv n k = (* 1 / s(n,k) *)
if n <= 4 then
one
else
let k4 = (abs k) mod (n / 4) in
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
(sinv (n / 4) k4') @* (sec n k4')
in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *)
and sdiv4 n k = (* s(n,k) / s(4*n,k) *)
let k4 = (abs k) mod n in
sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4))
in let t n k = (exp n k) @* (sdiv4 (n/4) k)
and dft1 input = input
and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k))
in let rec newsplit0 sign n input =
if (n == 1) then dft1 input
else if (n == 2) then dft2 input
else let u = newsplit0 sign (n / 2) (fun i -> input (i*2))
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n))
and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in
let w = array n (fun k -> twid k @* z (k mod (n / 4)))
and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in
let ww = array n (fun k -> w k @+ w' k) in
array n (fun k -> u (k mod (n / 2)) @+ ww k)
and newsplitS sign n input =
if (n == 1) then dft1 input
else if (n == 2) then dft2 input
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
let ww = array n (fun k -> w k @+ w' k) in
array n (fun k -> u (k mod (n / 2)) @+ ww k)
and newsplitS2 sign n input =
if (n == 1) then dft1 input
else if (n == 2) then dft2 input
else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2))
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in
array n (fun k -> u (k mod (n / 2)) @+ ww k)
and newsplitS4 sign n input =
if (n == 1) then dft1 input
else if (n == 2) then
let f = dft2 input
in array 2 (fun k -> (f k) @* (sinv 8 k))
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
let ww = array n (fun k -> w k @+ w' k) in
array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k))
in newsplit0 sign n input
and dft sign n input =
let rec cooley_tukey sign n1 n2 input =
let tmp1 =
array n2 (fun i2 ->
dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in
let tmp2 =
array n1 (fun i1 ->
array n2 (fun i2 ->
exp n (sign * i1 * i2) @* tmp1 i2 i1)) in
let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in
(fun i -> tmp3 (i mod n1) (i / n1))
(*
* This is "exponent -1" split-radix by Dan Bernstein.
*)
and split_radix_dit sign n input =
let f0 = dft sign (n / 2) (fun i -> input (i * 2))
and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1))
and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in
let g10 = array n (fun k ->
exp n (sign * k) @* f10 (k mod (n / 4)))
and g11 = array n (fun k ->
exp n (- sign * k) @* f11 (k mod (n / 4))) in
let g1 = array n (fun k -> g10 k @+ g11 k) in
array n (fun k -> f0 (k mod (n / 2)) @+ g1 k)
and split_radix_dif sign n input =
let n2 = n / 2 and n4 = n / 4 in
let x0 = array n2 (fun i -> input i @+ input (i + n2))
and x10 = array n4 (fun i -> input i @- input (i + n2))
and x11 = array n4 (fun i ->
input (i + n4) @- input (i + n2 + n4)) in
let x1 k i =
exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in
let f0 = dft sign n2 x0
and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in
array n (fun k ->
if k mod 2 = 0 then f0 (k / 2)
else let k' = k mod 4 in f1 k' ((k - k') / 4))
and prime_factor sign n1 n2 input =
let tmp1 = array n2 (fun i2 ->
dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n)))
in let tmp2 = array n1 (fun i1 ->
dft sign n2 (fun k2 -> tmp1 k2 i1))
in fun i -> tmp2 (i mod n1) (i mod n2)
in let algorithm sign n =
let r = choose_factor n in
if List.mem n !Magic.rader_list then
(* special cases *)
dft_rader sign n
else if (r == 1) then (* n is prime *)
dft_prime sign n
else if (gcd r (n / r)) == 1 then
prime_factor sign r (n / r)
else if (n mod 4 = 0 && n > 4) then
if !Magic.newsplit && is_power_of_two n then
newsplit sign n
else if !Magic.dif_split_radix then
split_radix_dif sign n
else
split_radix_dit sign n
else
cooley_tukey sign r (n / r)
in
array n (algorithm sign n input)

View File

@@ -0,0 +1,22 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val dft : int -> int -> Complex.signal -> Complex.signal

View File

@@ -0,0 +1,186 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given R-stride";
]
let byi = Complex.times Complex.i
let byui = Complex.times (Complex.uminus Complex.i)
let sym n f i = if (i < n - i) then f i else Complex.conj (f i)
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
let generate n =
let rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
(* the array names are from the point of view of the complex array
(output in R2C, input in C2R) *)
and arp = "Rp" (* real, positive *)
and aip = "Ip" (* imag, positive *)
and arm = "Rm" (* real, negative *)
and aim = "Im" (* imag, negative *)
in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
(* assume a single location. No point in doing alias analysis *)
let the_location = (Unique.make (), Unique.make ()) in
let locations _ = the_location in
let locr = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript arm vrs)
locations "BUG")
and loci = (locative_array_c n
(C.array_subscript aip vrs)
(C.array_subscript aim vrs)
locations "BUG")
and locp = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript aip vrs)
locations "BUG")
and locm = (locative_array_c n
(C.array_subscript arm vrs)
(C.array_subscript aim vrs)
locations "BUG")
in
let locri i = if i mod 2 == 0 then locr (i/2) else loci ((i-1)/2)
and locpm i = if i < n - i then locp i else locm (n-1-i)
in
let asch =
match !ditdif with
| DIT ->
let output = Fft.dft sign n (byw (load_array_c n locri)) in
let odag = store_array_c n locpm (sym n output) in
standard_optimizer odag
| DIF ->
let output = byw (Fft.dft sign n (sym n (load_array_c n locpm))) in
let odag = store_array_c n locri output in
standard_optimizer odag
in
let vms = CVar "ms"
and varp = CVar arp
and vaip = CVar aip
and varm = CVar arm
and vaim = CVar aim
and vm = CVar m and vmb = CVar mb and vme = CVar me
in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (CPlus [vmb; CUminus (Integer 1)],
Integer nt)])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (varp, CPlus [varp; byvl vms]);
Expr_assign (vaip, CPlus [vaip; byvl vms]);
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
byvl (Integer nt)]);
make_volatile_stride (4*n) (CVar rs)
],
Asch asch)])
in
let tree =
Fcn ("static void", name,
[Decl (C.realtypep, arp);
Decl (C.realtypep, aip);
Decl (C.realtypep, arm);
Decl (C.realtypep, aim);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
n name (flops_of tree)
and register = "X(khc2c_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_RDFT);\n}" register name)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,208 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given R-stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let byi = Complex.times Complex.i
let byui = Complex.times (Complex.uminus Complex.i)
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
let generate n =
let rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
(* the array names are from the point of view of the complex array
(output in R2C, input in C2R) *)
and arp = "Rp" (* real, positive *)
and aip = "Ip" (* imag, positive *)
and arm = "Rm" (* real, negative *)
and aim = "Im" (* imag, negative *)
in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
(* assume a single location. No point in doing alias analysis *)
let the_location = (Unique.make (), Unique.make ()) in
let locations _ = the_location in
let rlocp = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript aip vrs)
locations "BUG")
and rlocm = (locative_array_c n
(C.array_subscript arm vrs)
(C.array_subscript aim vrs)
locations "BUG")
and clocp = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript aip vrs)
locations "BUG")
and clocm = (locative_array_c n
(C.array_subscript arm vrs)
(C.array_subscript aim vrs)
locations "BUG")
in
let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2)
and cloc i = if i < n - i then clocp i else clocm (n-1-i)
and sym n f i = if (i < n - i) then f i else Complex.conj (f i)
and sym1 f i =
if i mod 2 == 0 then
Complex.plus [f i; Complex.conj (f (i+1))]
else
Complex.times (Complex.uminus Complex.i)
(Complex.plus [f (i-1); Complex.uminus (Complex.conj (f i))])
and sym1i f i =
if i mod 2 == 0 then
Complex.plus [f i; Complex.times Complex.i (f (i+1))]
else
Complex.conj
(Complex.plus [f (i-1);
Complex.times (Complex.uminus Complex.i) (f i)])
in
let asch =
match !ditdif with
| DIT ->
let output =
(Complex.times Complex.half) @@
(Fft.dft sign n (byw (sym1 (load_array_c n rloc)))) in
let odag = store_array_c n cloc (sym n output) in
standard_optimizer odag
| DIF ->
let output =
byw (Fft.dft sign n (sym n (load_array_c n cloc)))
in
let odag = store_array_c n rloc (sym1i output) in
standard_optimizer odag
in
let vms = CVar "ms"
and varp = CVar arp
and vaip = CVar aip
and varm = CVar arm
and vaim = CVar aim
and vm = CVar m and vmb = CVar mb and vme = CVar me
in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (CPlus [vmb; CUminus (Integer 1)],
Integer nt)])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (varp, CPlus [varp; byvl vms]);
Expr_assign (vaip, CPlus [vaip; byvl vms]);
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
byvl (Integer nt)]);
make_volatile_stride (4*n) (CVar rs)
],
Asch asch)]
)
in
let tree =
Fcn ("static void", name,
[Decl (C.realtypep, arp);
Decl (C.realtypep, aip);
Decl (C.realtypep, arm);
Decl (C.realtypep, aim);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const hc2c_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
n name (flops_of tree)
and register = "X(khc2c_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,221 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given R-stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let byi = Complex.times Complex.i
let byui = Complex.times (Complex.uminus Complex.i)
let shuffle_eo fe fo i = if i mod 2 == 0 then fe (i/2) else fo ((i-1)/2)
let generate n =
let rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms"
(* the array names are from the point of view of the complex array
(output in R2C, input in C2R) *)
and arp = "Rp" (* real, positive *)
and aip = "Ip" (* imag, positive *)
and arm = "Rm" (* real, negative *)
and aim = "Im" (* imag, negative *)
in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x))
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 true in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
let sms = stride_to_string "ms" !ums in
let msms = "-" ^ sms in
(* assume a single location. No point in doing alias analysis *)
let the_location = (Unique.make (), Unique.make ()) in
let locations _ = the_location in
let rlocp = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript aip vrs)
locations sms)
and rlocm = (locative_array_c n
(C.array_subscript arm vrs)
(C.array_subscript aim vrs)
locations msms)
and clocp = (locative_array_c n
(C.array_subscript arp vrs)
(C.array_subscript aip vrs)
locations sms)
and clocm = (locative_array_c n
(C.array_subscript arm vrs)
(C.array_subscript aim vrs)
locations msms)
in
let rloc i = if i mod 2 == 0 then rlocp (i/2) else rlocm ((i-1)/2)
and cloc i = if i < n - i then clocp i else clocm (n-1-i)
and sym n f i =
if (i < n - i) then
f i
else
Complex.times (Complex.nan Expr.CONJ) (f i)
and sym1 f i =
if i mod 2 == 0 then
Complex.plus [f i;
Complex.times (Complex.nan Expr.CONJ) (f (i+1))]
else
Complex.times (Complex.nan Expr.I)
(Complex.plus [Complex.uminus (f (i-1));
Complex.times (Complex.nan Expr.CONJ) (f i)])
and sym1i f i =
if i mod 2 == 0 then
Complex.plus [f i;
Complex.times (Complex.nan Expr.I) (f (i+1))]
else
Complex.times (Complex.nan Expr.CONJ)
(Complex.plus [f (i-1);
Complex.uminus
(Complex.times (Complex.nan Expr.I) (f i))])
in
let asch =
match !ditdif with
| DIT ->
let output =
(Complex.times Complex.half) @@
(Trig.dft_via_rdft sign n (byw (sym1 (load_array_r n rloc)))) in
let odag = store_array_r n cloc (sym n output) in
standard_optimizer odag
| DIF ->
let output =
byw (Trig.dft_via_rdft sign n (sym n (load_array_r n cloc)))
in
let odag = store_array_r n rloc (sym1i output) in
standard_optimizer odag
in
let vms = CVar sms
and varp = CVar arp
and vaip = CVar aip
and varm = CVar arm
and vaim = CVar aim
and vm = CVar m and vmb = CVar mb and vme = CVar me
in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (CPlus [vmb; CUminus (Integer 1)],
bytwvl_vl (Integer nt))])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (varp, CPlus [varp; byvl vms]);
Expr_assign (vaip, CPlus [vaip; byvl vms]);
Expr_assign (varm, CPlus [varm; CUminus (byvl vms)]);
Expr_assign (vaim, CPlus [vaim; CUminus (byvl vms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
bytwvl (Integer nt)]);
make_volatile_stride (4*n) (CVar rs)
],
Asch asch)]
)
in
let tree =
Fcn ("static void", name,
[Decl (C.realtypep, arp);
Decl (C.realtypep, aip);
Decl (C.realtypep, arm);
Decl (C.realtypep, aim);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const hc2c_desc desc = {%d, %s, twinstr, &GENUS, %s};\n\n"
n (stringify name) (flops_of tree)
and register = "X(khc2c_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc, HC2C_VIA_DFT);\n}" register name)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
Simdmagic.simd_mode := true;
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,170 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given R-stride";
]
let rioarray = "cr"
and iioarray = "ci"
let genone sign n transform load store vrs =
let locations = unique_array_c n in
let input =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript iioarray vrs)
locations "BUG" in
let output = transform sign n (load n input) in
let ioloc =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript iioarray vrs)
locations "BUG" in
let odag = store n ioloc output in
let annot = standard_optimizer odag
in annot
let byi = Complex.times Complex.i
let byui = Complex.times (Complex.uminus Complex.i)
let sym1 n f i =
Complex.plus [Complex.real (f i); byi (Complex.imag (f (n - 1 - i)))]
let sym2 n f i = if (i < n - i) then f i else byi (f i)
let sym2i n f i = if (i < n - i) then f i else byui (f i)
let generate n =
let rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 1 false in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
let asch =
match !ditdif with
| DIT ->
genone sign n
(fun sign n input ->
((sym1 n) @@ (sym2 n)) (Fft.dft sign n (byw input)))
load_array_c store_array_c vrs
| DIF ->
genone sign n
(fun sign n input ->
byw (Fft.dft sign n (((sym2i n) @@ (sym1 n)) input)))
load_array_c store_array_c vrs
in
let vms = CVar "ms"
and vrioarray = CVar rioarray
and viioarray = CVar iioarray
and vm = CVar m and vmb = CVar mb and vme = CVar me
in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (CPlus [vmb; CUminus (Integer 1)],
Integer nt)])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (vrioarray, CPlus [vrioarray; byvl vms]);
Expr_assign (viioarray,
CPlus [viioarray; CUminus (byvl vms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
byvl (Integer nt)]);
make_volatile_stride (2*n) (CVar rs)
],
Asch asch)])
in
let tree =
Fcn ("static void", name,
[Decl (C.realtypep, rioarray);
Decl (C.realtypep, iioarray);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const hc2hc_desc desc = {%d, \"%s\", twinstr, &GENUS, %s};\n\n"
n name (flops_of tree)
and register = "X(khc2hc_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,257 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* generation of trigonometric transforms *)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let uistride = ref Stride_variable
let uostride = ref Stride_variable
let uivstride = ref Stride_variable
let uovstride = ref Stride_variable
let normalization = ref 1
type mode =
| MDCT
| MDCT_MP3
| MDCT_VORBIS
| MDCT_WINDOW
| MDCT_WINDOW_SYM
| IMDCT
| IMDCT_MP3
| IMDCT_VORBIS
| IMDCT_WINDOW
| IMDCT_WINDOW_SYM
| NONE
let mode = ref NONE
let speclist = [
"-with-istride",
Arg.String(fun x -> uistride := arg_to_stride x),
" specialize for given input stride";
"-with-ostride",
Arg.String(fun x -> uostride := arg_to_stride x),
" specialize for given output stride";
"-with-ivstride",
Arg.String(fun x -> uivstride := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovstride",
Arg.String(fun x -> uovstride := arg_to_stride x),
" specialize for given output vector stride";
"-normalization",
Arg.String(fun x -> normalization := int_of_string x),
" normalization integer to divide by";
"-mdct",
Arg.Unit(fun () -> mode := MDCT),
" generate an MDCT codelet";
"-mdct-mp3",
Arg.Unit(fun () -> mode := MDCT_MP3),
" generate an MDCT codelet with MP3 windowing";
"-mdct-window",
Arg.Unit(fun () -> mode := MDCT_WINDOW),
" generate an MDCT codelet with window array";
"-mdct-window-sym",
Arg.Unit(fun () -> mode := MDCT_WINDOW_SYM),
" generate an MDCT codelet with symmetric window array";
"-imdct",
Arg.Unit(fun () -> mode := IMDCT),
" generate an IMDCT codelet";
"-imdct-mp3",
Arg.Unit(fun () -> mode := IMDCT_MP3),
" generate an IMDCT codelet with MP3 windowing";
"-imdct-window",
Arg.Unit(fun () -> mode := IMDCT_WINDOW),
" generate an IMDCT codelet with window array";
"-imdct-window-sym",
Arg.Unit(fun () -> mode := IMDCT_WINDOW_SYM),
" generate an IMDCT codelet with symmetric window array";
]
let unity_window n i = Complex.one
(* MP3 window(k) = sin(pi/(2n) * (k + 1/2)) *)
let mp3_window n k =
Complex.imag (Complex.exp (8 * n) (2*k + 1))
(* Vorbis window(k) = sin(pi/2 * (mp3_window(k))^2)
... this is transcendental, though, so we can't do it with our
current Complex.exp function *)
let window_array n w =
array n (fun i ->
let stride = C.SInteger 1
and klass = Unique.make () in
let refr = C.array_subscript w stride i in
let kr = Variable.make_constant klass refr in
load_r (kr, kr))
let load_window w n i = w i
let load_window_sym w n i = w (if (i < n) then i else (2*n - 1 - i))
(* fixme: use same locations for input and output so that it works in-place? *)
(* Note: only correct for even n! *)
let load_array_mdct window n rarr iarr locations =
let twon = 2 * n in
let arr = load_array_c twon
(locative_array_c twon rarr iarr locations "BUG") in
let arrw = fun i -> Complex.times (window n i) (arr i) in
array n
((Complex.times Complex.half) @@
(fun i ->
if (i < n/2) then
Complex.uminus (Complex.plus [arrw (i + n + n/2);
arrw (n + n/2 - 1 - i)])
else
Complex.plus [arrw (i - n/2);
Complex.uminus (arrw (n + n/2 - 1 - i))]))
let store_array_mdct window n rarr iarr locations arr =
store_array_r n (locative_array_c n rarr iarr locations "BUG") arr
let load_array_imdct window n rarr iarr locations =
load_array_c n (locative_array_c n rarr iarr locations "BUG")
let store_array_imdct window n rarr iarr locations arr =
let n2 = n/2 in
let threen2 = 3*n2 in
let arr2 = fun i ->
if (i < n2) then
arr (i + n2)
else if (i < threen2) then
Complex.uminus (arr (threen2 - 1 - i))
else
Complex.uminus (arr (i - threen2))
in
let arr2w = fun i -> Complex.times (window n i) (arr2 i) in
let twon = 2 * n in
store_array_r twon (locative_array_c twon rarr iarr locations "BUG") arr2w
let window_param = function
MDCT_WINDOW -> true
| MDCT_WINDOW_SYM -> true
| IMDCT_WINDOW -> true
| IMDCT_WINDOW_SYM -> true
| _ -> false
let generate n mode =
let iarray = "I"
and oarray = "O"
and istride = "istride"
and ostride = "ostride"
and window = "W"
and name = !Magic.codelet_name in
let vistride = either_stride (!uistride) (C.SVar istride)
and vostride = either_stride (!uostride) (C.SVar ostride)
in
let sivs = stride_to_string "ovs" !uovstride in
let sovs = stride_to_string "ivs" !uivstride in
let (transform, load_input, store_output) = match mode with
| MDCT -> Trig.dctIV, load_array_mdct unity_window,
store_array_mdct unity_window
| MDCT_MP3 -> Trig.dctIV, load_array_mdct mp3_window,
store_array_mdct unity_window
| MDCT_WINDOW -> Trig.dctIV, load_array_mdct
(load_window (window_array (2 * n) window)),
store_array_mdct unity_window
| MDCT_WINDOW_SYM -> Trig.dctIV, load_array_mdct
(load_window_sym (window_array n window)),
store_array_mdct unity_window
| IMDCT -> Trig.dctIV, load_array_imdct unity_window,
store_array_imdct unity_window
| IMDCT_MP3 -> Trig.dctIV, load_array_imdct unity_window,
store_array_imdct mp3_window
| IMDCT_WINDOW -> Trig.dctIV, load_array_imdct unity_window,
store_array_imdct (load_window (window_array (2 * n) window))
| IMDCT_WINDOW_SYM -> Trig.dctIV, load_array_imdct unity_window,
store_array_imdct (load_window_sym (window_array n window))
| _ -> failwith "must specify transform kind"
in
let locations = unique_array_c (2*n) in
let input =
load_input n
(C.array_subscript iarray vistride)
(C.array_subscript "BUG" vistride)
locations
in
let output = (Complex.times (Complex.inverse_int !normalization))
@@ (transform n input) in
let odag =
store_output n
(C.array_subscript oarray vostride)
(C.array_subscript "BUG" vostride)
locations
output
in
let annot = standard_optimizer odag in
let tree =
Fcn ("void", name,
([Decl (C.constrealtypep, iarray);
Decl (C.realtypep, oarray)]
@ (if stride_fixed !uistride then []
else [Decl (C.stridetype, istride)])
@ (if stride_fixed !uostride then []
else [Decl (C.stridetype, ostride)])
@ (choose_simd []
(if stride_fixed !uivstride then [] else
[Decl ("int", sivs)]))
@ (choose_simd []
(if stride_fixed !uovstride then [] else
[Decl ("int", sovs)]))
@ (if (not (window_param mode)) then []
else [Decl (C.constrealtypep, window)])
),
finalize_fcn (Asch annot))
in
(unparse tree) ^ "\n"
let main () =
begin
parse speclist usage;
print_string (generate (check_size ()) !mode);
end
let _ = main()

View File

@@ -0,0 +1,168 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let uistride = ref Stride_variable
let uostride = ref Stride_variable
let uivstride = ref Stride_variable
let uovstride = ref Stride_variable
let speclist = [
"-with-istride",
Arg.String(fun x -> uistride := arg_to_stride x),
" specialize for given input stride";
"-with-ostride",
Arg.String(fun x -> uostride := arg_to_stride x),
" specialize for given output stride";
"-with-ivstride",
Arg.String(fun x -> uivstride := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovstride",
Arg.String(fun x -> uovstride := arg_to_stride x),
" specialize for given output vector stride"
]
let nonstandard_optimizer list_of_buddy_stores dag =
let sched = standard_scheduler dag in
let annot = Annotate.annotate list_of_buddy_stores sched in
let _ = dump_asched annot in
annot
let generate n =
let riarray = "ri"
and iiarray = "ii"
and roarray = "ro"
and ioarray = "io"
and istride = "is"
and ostride = "os"
and i = "i"
and v = "v"
in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in
let ename = expand_name name in
let vistride = either_stride (!uistride) (C.SVar istride)
and vostride = either_stride (!uostride) (C.SVar ostride)
in
let sovs = stride_to_string "ovs" !uovstride in
let sivs = stride_to_string "ivs" !uivstride in
let locations = unique_array_c n in
let input =
locative_array_c n
(C.array_subscript riarray vistride)
(C.array_subscript iiarray vistride)
locations sivs in
let output = Fft.dft sign n (load_array_c n input) in
let oloc =
locative_array_c n
(C.array_subscript roarray vostride)
(C.array_subscript ioarray vostride)
locations sovs in
let list_of_buddy_stores =
let k = !Simdmagic.store_multiple in
if (k > 1) then
if (n mod k == 0) then
List.append
(List.map
(fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k))
(iota (n / k)))
(List.map
(fun i -> List.map (fun j -> (snd (oloc (k * i + j)))) (iota k))
(iota (n / k)))
else failwith "invalid n for -store-multiple"
else []
in
let odag = store_array_c n oloc output in
let annot = nonstandard_optimizer list_of_buddy_stores odag in
let body = Block (
[Decl ("INT", i)],
[For (Expr_assign (CVar i, CVar v),
Binop (" > ", CVar i, Integer 0),
list_to_comma
[Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]);
Expr_assign (CVar riarray, CPlus [CVar riarray;
byvl (CVar sivs)]);
Expr_assign (CVar iiarray, CPlus [CVar iiarray;
byvl (CVar sivs)]);
Expr_assign (CVar roarray, CPlus [CVar roarray;
byvl (CVar sovs)]);
Expr_assign (CVar ioarray, CPlus [CVar ioarray;
byvl (CVar sovs)]);
make_volatile_stride (4*n) (CVar istride);
make_volatile_stride (4*n) (CVar ostride)
],
Asch annot)
])
in
let tree =
Fcn ((if !Magic.standalone then "void" else "static void"), ename,
([Decl (C.constrealtypep, riarray);
Decl (C.constrealtypep, iiarray);
Decl (C.realtypep, roarray);
Decl (C.realtypep, ioarray);
Decl (C.stridetype, istride);
Decl (C.stridetype, ostride);
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
finalize_fcn body)
in let desc =
Printf.sprintf
"static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n"
n (stringify name) (flops_of tree)
(stride_to_solverparm !uistride) (stride_to_solverparm !uostride)
(choose_simd "0" (stride_to_solverparm !uivstride))
(choose_simd "0" (stride_to_solverparm !uovstride))
and init =
(declare_register_fcn name) ^
"{" ^
" X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^
"}\n"
in ((unparse tree) ^ "\n" ^
(if !Magic.standalone then "" else desc ^ init))
let main () =
begin
parse speclist usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,165 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let uistride = ref Stride_variable
let uostride = ref Stride_variable
let uivstride = ref Stride_variable
let uovstride = ref Stride_variable
let speclist = [
"-with-istride",
Arg.String(fun x -> uistride := arg_to_stride x),
" specialize for given input stride";
"-with-ostride",
Arg.String(fun x -> uostride := arg_to_stride x),
" specialize for given output stride";
"-with-ivstride",
Arg.String(fun x -> uivstride := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovstride",
Arg.String(fun x -> uovstride := arg_to_stride x),
" specialize for given output vector stride"
]
let nonstandard_optimizer list_of_buddy_stores dag =
let sched = standard_scheduler dag in
let annot = Annotate.annotate list_of_buddy_stores sched in
let _ = dump_asched annot in
annot
let generate n =
let riarray = "xi"
and roarray = "xo"
and istride = "is"
and ostride = "os"
and i = "i"
and v = "v"
in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x)) in
let ename = expand_name name in
let vistride = either_stride (!uistride) (C.SVar istride)
and vostride = either_stride (!uostride) (C.SVar ostride)
in
let sivs = stride_to_string "ivs" !uivstride in
let sovs = stride_to_string "ovs" !uovstride in
let fft = Trig.dft_via_rdft in
let locations = unique_array_c n in
let input =
locative_array_c n
(C.array_subscript riarray vistride)
(C.array_subscript "BUG" vistride)
locations sivs in
let output = fft sign n (load_array_r n input) in
let oloc =
locative_array_c n
(C.array_subscript roarray vostride)
(C.array_subscript "BUG" vostride)
locations sovs in
let list_of_buddy_stores =
let k = !Simdmagic.store_multiple in
if (k > 1) then
if (n mod k == 0) then
List.map
(fun i -> List.map (fun j -> (fst (oloc (k * i + j)))) (iota k))
(iota (n / k))
else failwith "invalid n for -store-multiple"
else []
in
let odag = store_array_r n oloc output in
let annot = nonstandard_optimizer list_of_buddy_stores odag in
let body = Block (
[Decl ("INT", i);
Decl (C.constrealtypep, riarray);
Decl (C.realtypep, roarray)],
[Stmt_assign (CVar riarray, CVar (if (sign < 0) then "ri" else "ii"));
Stmt_assign (CVar roarray, CVar (if (sign < 0) then "ro" else "io"));
For (Expr_assign (CVar i, CVar v),
Binop (" > ", CVar i, Integer 0),
list_to_comma
[Expr_assign (CVar i, CPlus [CVar i; CUminus (byvl (Integer 1))]);
Expr_assign (CVar riarray, CPlus [CVar riarray;
byvl (CVar sivs)]);
Expr_assign (CVar roarray, CPlus [CVar roarray;
byvl (CVar sovs)]);
make_volatile_stride (2*n) (CVar istride);
make_volatile_stride (2*n) (CVar ostride)
],
Asch annot);
])
in
let tree =
Fcn ((if !Magic.standalone then "void" else "static void"), ename,
([Decl (C.constrealtypep, "ri");
Decl (C.constrealtypep, "ii");
Decl (C.realtypep, "ro");
Decl (C.realtypep, "io");
Decl (C.stridetype, istride);
Decl (C.stridetype, ostride);
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
finalize_fcn body)
in
let desc =
Printf.sprintf
"static const kdft_desc desc = { %d, %s, %s, &GENUS, %s, %s, %s, %s };\n"
n (stringify name) (flops_of tree)
(stride_to_solverparm !uistride) (stride_to_solverparm !uostride)
(choose_simd "0" (stride_to_solverparm !uivstride))
(choose_simd "0" (stride_to_solverparm !uovstride))
and init =
(declare_register_fcn name) ^
"{" ^
" X(kdft_register)(p, " ^ ename ^ ", &desc);\n" ^
"}\n"
in ((unparse tree) ^ "\n" ^
(if !Magic.standalone then "" else desc ^ init))
let main () =
begin
Simdmagic.simd_mode := true;
parse speclist usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,167 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let urs = ref Stride_variable
let ucsr = ref Stride_variable
let ucsi = ref Stride_variable
let uivs = ref Stride_variable
let uovs = ref Stride_variable
let dftIII_flag = ref false
let speclist = [
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given real-array stride";
"-with-csr",
Arg.String(fun x -> ucsr := arg_to_stride x),
" specialize for given complex-array real stride";
"-with-csi",
Arg.String(fun x -> ucsi := arg_to_stride x),
" specialize for given complex-array imaginary stride";
"-with-ivs",
Arg.String(fun x -> uivs := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovs",
Arg.String(fun x -> uovs := arg_to_stride x),
" specialize for given output vector stride";
"-dft-III",
Arg.Unit(fun () -> dftIII_flag := true),
" produce shifted dftIII-style codelets"
]
let hcdftIII sign n input =
let input' i =
if (i mod 2 == 0) then
Complex.zero
else
let i' = (i - 1) / 2 in
if (2 * i' < n - 1) then (input i')
else if (2 * i' == n - 1) then
Complex.real (input i')
else
Complex.conj (input (n - 1 - i'))
in Fft.dft sign (2 * n) input'
let generate n =
let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci"
and rs = "rs" and csr = "csr" and csi = "csi"
and i = "i" and v = "v"
and transform = if !dftIII_flag then hcdftIII else Trig.hdft
in
let sign = !Genutil.sign
and name = !Magic.codelet_name in
let vrs = either_stride (!urs) (C.SVar rs)
and vcsr = either_stride (!ucsr) (C.SVar csr)
and vcsi = either_stride (!ucsi) (C.SVar csi)
in
let sovs = stride_to_string "ovs" !uovs in
let sivs = stride_to_string "ivs" !uivs in
let locations = unique_array_c n in
let input =
locative_array_c n
(C.array_subscript acr vcsr)
(C.array_subscript aci vcsi)
locations sivs in
let output = transform sign n (load_array_hc n input) in
let oloce =
locative_array_c n
(C.array_subscript ar0 vrs)
(C.array_subscript "BUG" vrs)
locations sovs
and oloco =
locative_array_c n
(C.array_subscript ar1 vrs)
(C.array_subscript "BUG" vrs)
locations sovs in
let oloc i = if i mod 2 == 0 then oloce (i/2) else oloco ((i-1)/2) in
let odag = store_array_r n oloc output in
let annot = standard_optimizer odag in
let body = Block (
[Decl ("INT", i)],
[For (Expr_assign (CVar i, CVar v),
Binop (" > ", CVar i, Integer 0),
list_to_comma
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sovs]);
Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sovs]);
Expr_assign (CVar acr, CPlus [CVar acr; CVar sivs]);
Expr_assign (CVar aci, CPlus [CVar aci; CVar sivs]);
make_volatile_stride (4*n) (CVar rs);
make_volatile_stride (4*n) (CVar csr);
make_volatile_stride (4*n) (CVar csi)
],
Asch annot)
])
in
let tree =
Fcn ((if !Magic.standalone then "void" else "static void"), name,
([Decl (C.realtypep, ar0);
Decl (C.realtypep, ar1);
Decl (C.realtypep, acr);
Decl (C.realtypep, aci);
Decl (C.stridetype, rs);
Decl (C.stridetype, csr);
Decl (C.stridetype, csi);
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
finalize_fcn body)
in let desc =
Printf.sprintf
"static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n"
n name (flops_of tree)
and init =
(declare_register_fcn name) ^
"{" ^
" X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^
"}\n"
in
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
let main () =
begin
parse speclist usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,164 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let urs = ref Stride_variable
let ucsr = ref Stride_variable
let ucsi = ref Stride_variable
let uivs = ref Stride_variable
let uovs = ref Stride_variable
let dftII_flag = ref false
let speclist = [
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given real-array stride";
"-with-csr",
Arg.String(fun x -> ucsr := arg_to_stride x),
" specialize for given complex-array real stride";
"-with-csi",
Arg.String(fun x -> ucsi := arg_to_stride x),
" specialize for given complex-array imaginary stride";
"-with-ivs",
Arg.String(fun x -> uivs := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovs",
Arg.String(fun x -> uovs := arg_to_stride x),
" specialize for given output vector stride";
"-dft-II",
Arg.Unit(fun () -> dftII_flag := true),
" produce shifted dftII-style codelets"
]
let rdftII sign n input =
let input' i = if i < n then input i else Complex.zero in
let f = Fft.dft sign (2 * n) input' in
let g i = f (2 * i + 1)
in fun i ->
if (i < n - i) then g i
else if (2 * i + 1 == n) then Complex.real (g i)
else Complex.zero
let generate n =
let ar0 = "R0" and ar1 = "R1" and acr = "Cr" and aci = "Ci"
and rs = "rs" and csr = "csr" and csi = "csi"
and i = "i" and v = "v"
and transform = if !dftII_flag then rdftII else Trig.rdft
in
let sign = !Genutil.sign
and name = !Magic.codelet_name in
let vrs = either_stride (!urs) (C.SVar rs)
and vcsr = either_stride (!ucsr) (C.SVar csr)
and vcsi = either_stride (!ucsi) (C.SVar csi)
in
let sovs = stride_to_string "ovs" !uovs in
let sivs = stride_to_string "ivs" !uivs in
let locations = unique_array_c n in
let inpute =
locative_array_c n
(C.array_subscript ar0 vrs)
(C.array_subscript "BUG" vrs)
locations sivs
and inputo =
locative_array_c n
(C.array_subscript ar1 vrs)
(C.array_subscript "BUG" vrs)
locations sivs
in
let input i = if i mod 2 == 0 then inpute (i/2) else inputo ((i-1)/2) in
let output = transform sign n (load_array_r n input) in
let oloc =
locative_array_c n
(C.array_subscript acr vcsr)
(C.array_subscript aci vcsi)
locations sovs in
let odag = store_array_hc n oloc output in
let annot = standard_optimizer odag in
let body = Block (
[Decl ("INT", i)],
[For (Expr_assign (CVar i, CVar v),
Binop (" > ", CVar i, Integer 0),
list_to_comma
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
Expr_assign (CVar ar0, CPlus [CVar ar0; CVar sivs]);
Expr_assign (CVar ar1, CPlus [CVar ar1; CVar sivs]);
Expr_assign (CVar acr, CPlus [CVar acr; CVar sovs]);
Expr_assign (CVar aci, CPlus [CVar aci; CVar sovs]);
make_volatile_stride (4*n) (CVar rs);
make_volatile_stride (4*n) (CVar csr);
make_volatile_stride (4*n) (CVar csi)
],
Asch annot)
])
in
let tree =
Fcn ((if !Magic.standalone then "void" else "static void"), name,
([Decl (C.realtypep, ar0);
Decl (C.realtypep, ar1);
Decl (C.realtypep, acr);
Decl (C.realtypep, aci);
Decl (C.stridetype, rs);
Decl (C.stridetype, csr);
Decl (C.stridetype, csi);
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
finalize_fcn body)
in let desc =
Printf.sprintf
"static const kr2c_desc desc = { %d, \"%s\", %s, &GENUS };\n\n"
n name (flops_of tree)
and init =
(declare_register_fcn name) ^
"{" ^
" X(kr2c_register)(p, " ^ name ^ ", &desc);\n" ^
"}\n"
in
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
let main () =
begin
parse speclist usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,257 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* generation of trigonometric transforms *)
open Util
open Genutil
open C
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number>"
let uistride = ref Stride_variable
let uostride = ref Stride_variable
let uivstride = ref Stride_variable
let uovstride = ref Stride_variable
type mode =
| RDFT
| HDFT
| DHT
| REDFT00
| REDFT10
| REDFT01
| REDFT11
| RODFT00
| RODFT10
| RODFT01
| RODFT11
| NONE
let mode = ref NONE
let normsqr = ref 1
let unitary = ref false
let noloop = ref false
let speclist = [
"-with-istride",
Arg.String(fun x -> uistride := arg_to_stride x),
" specialize for given input stride";
"-with-ostride",
Arg.String(fun x -> uostride := arg_to_stride x),
" specialize for given output stride";
"-with-ivstride",
Arg.String(fun x -> uivstride := arg_to_stride x),
" specialize for given input vector stride";
"-with-ovstride",
Arg.String(fun x -> uovstride := arg_to_stride x),
" specialize for given output vector stride";
"-rdft",
Arg.Unit(fun () -> mode := RDFT),
" generate a real DFT codelet";
"-hdft",
Arg.Unit(fun () -> mode := HDFT),
" generate a Hermitian DFT codelet";
"-dht",
Arg.Unit(fun () -> mode := DHT),
" generate a DHT codelet";
"-redft00",
Arg.Unit(fun () -> mode := REDFT00),
" generate a DCT-I codelet";
"-redft10",
Arg.Unit(fun () -> mode := REDFT10),
" generate a DCT-II codelet";
"-redft01",
Arg.Unit(fun () -> mode := REDFT01),
" generate a DCT-III codelet";
"-redft11",
Arg.Unit(fun () -> mode := REDFT11),
" generate a DCT-IV codelet";
"-rodft00",
Arg.Unit(fun () -> mode := RODFT00),
" generate a DST-I codelet";
"-rodft10",
Arg.Unit(fun () -> mode := RODFT10),
" generate a DST-II codelet";
"-rodft01",
Arg.Unit(fun () -> mode := RODFT01),
" generate a DST-III codelet";
"-rodft11",
Arg.Unit(fun () -> mode := RODFT11),
" generate a DST-IV codelet";
"-normalization",
Arg.String(fun x -> let ix = int_of_string x in normsqr := ix * ix),
" normalization integer to divide by";
"-normsqr",
Arg.String(fun x -> normsqr := int_of_string x),
" integer square of normalization to divide by";
"-unitary",
Arg.Unit(fun () -> unitary := true),
" unitary normalization (up overall scale factor)";
"-noloop",
Arg.Unit(fun () -> noloop := true),
" no vector loop";
]
let sqrt_half = Complex.inverse_int_sqrt 2
let sqrt_two = Complex.int_sqrt 2
let rescale sc s1 s2 input i =
if ((i == s1 || i == s2) && !unitary) then
Complex.times (input i) sc
else
input i
let generate n mode =
let iarray = "I"
and oarray = "O"
and istride = "is"
and ostride = "os"
and i = "i"
and v = "v"
in
let sign = !Genutil.sign
and name = !Magic.codelet_name in
let vistride = either_stride (!uistride) (C.SVar istride)
and vostride = either_stride (!uostride) (C.SVar ostride)
in
let sovs = stride_to_string "ovs" !uovstride in
let sivs = stride_to_string "ivs" !uivstride in
let (transform, load_input, store_output, si1,si2,so1,so2) = match mode with
| RDFT -> Trig.rdft sign, load_array_r, store_array_hc, -1,-1,-1,-1
| HDFT -> Trig.hdft sign, load_array_c, store_array_r, -1,-1,-1,-1 (* TODO *)
| DHT -> Trig.dht 1, load_array_r, store_array_r, -1,-1,-1,-1
| REDFT00 -> Trig.dctI, load_array_r, store_array_r, 0,n-1,0,n-1
| REDFT10 -> Trig.dctII, load_array_r, store_array_r, -1,-1,0,-1
| REDFT01 -> Trig.dctIII, load_array_r, store_array_r, 0,-1,-1,-1
| REDFT11 -> Trig.dctIV, load_array_r, store_array_r, -1,-1,-1,-1
| RODFT00 -> Trig.dstI, load_array_r, store_array_r, -1,-1,-1,-1
| RODFT10 -> Trig.dstII, load_array_r, store_array_r, -1,-1,n-1,-1
| RODFT01 -> Trig.dstIII, load_array_r, store_array_r, n-1,-1,-1,-1
| RODFT11 -> Trig.dstIV, load_array_r, store_array_r, -1,-1,-1,-1
| _ -> failwith "must specify transform kind"
in
let locations = unique_array_c n in
let input = locative_array_c n
(C.array_subscript iarray vistride)
(C.array_subscript "BUG" vistride)
locations sivs in
let output = rescale sqrt_half so1 so2
((Complex.times (Complex.inverse_int_sqrt !normsqr))
@@ (transform n (rescale sqrt_two si1 si2 (load_array_c n input)))) in
let oloc =
locative_array_c n
(C.array_subscript oarray vostride)
(C.array_subscript "BUG" vostride)
locations sovs in
let odag = store_output n oloc output in
let annot = standard_optimizer odag in
let body = if !noloop then Block([], [Asch annot]) else Block (
[Decl ("INT", i)],
[For (Expr_assign (CVar i, CVar v),
Binop (" > ", CVar i, Integer 0),
list_to_comma
[Expr_assign (CVar i, CPlus [CVar i; CUminus (Integer 1)]);
Expr_assign (CVar iarray, CPlus [CVar iarray; CVar sivs]);
Expr_assign (CVar oarray, CPlus [CVar oarray; CVar sovs]);
make_volatile_stride (2*n) (CVar istride);
make_volatile_stride (2*n) (CVar ostride)
],
Asch annot)
])
in
let tree =
Fcn ((if !Magic.standalone then "void" else "static void"), name,
([Decl (C.constrealtypep, iarray);
Decl (C.realtypep, oarray)]
@ (if stride_fixed !uistride then []
else [Decl (C.stridetype, istride)])
@ (if stride_fixed !uostride then []
else [Decl (C.stridetype, ostride)])
@ (if !noloop then [] else
[Decl ("INT", v)]
@ (if stride_fixed !uivstride then []
else [Decl ("INT", "ivs")])
@ (if stride_fixed !uovstride then []
else [Decl ("INT", "ovs")]))),
finalize_fcn body)
in let desc =
Printf.sprintf
"static const kr2r_desc desc = { %d, \"%s\", %s, &GENUS, %s };\n\n"
n name (flops_of tree)
(match mode with
| RDFT -> "RDFT00"
| HDFT -> "HDFT00"
| DHT -> "DHT"
| REDFT00 -> "REDFT00"
| REDFT10 -> "REDFT10"
| REDFT01 -> "REDFT01"
| REDFT11 -> "REDFT11"
| RODFT00 -> "RODFT00"
| RODFT10 -> "RODFT10"
| RODFT01 -> "RODFT01"
| RODFT11 -> "RODFT11"
| _ -> failwith "must specify a transform kind")
and init =
(declare_register_fcn name) ^
"{" ^
" X(kr2r_register)(p, " ^ name ^ ", &desc);\n" ^
"}\n"
in
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else desc ^ init)
let main () =
begin
parse speclist usage;
print_string (generate (check_size ()) !mode);
end
let _ = main()

View File

@@ -0,0 +1,161 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given i/o stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let generate n =
let rioarray = "ri"
and iioarray = "ii"
and rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "(2 * VL)", x)) in
let ename = expand_name name in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
let sms = stride_to_string "ms" !ums in
let locations = unique_array_c n in
let iloc =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript iioarray vrs)
locations sms
and oloc =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript iioarray vrs)
locations sms
in
let liloc = load_array_c n iloc in
let output =
match !ditdif with
| DIT -> array n (Fft.dft sign n (byw liloc))
| DIF -> array n (byw (Fft.dft sign n liloc))
in
let odag = store_array_c n oloc output in
let annot = standard_optimizer odag in
let vm = CVar m and vmb = CVar mb and vme = CVar me in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (vmb, Integer nt)])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
byvl (CVar sms)]);
Expr_assign (CVar iioarray, CPlus [CVar iioarray;
byvl (CVar sms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
byvl (Integer nt)]);
make_volatile_stride (2*n) (CVar rs)
],
Asch annot)])
in
let tree =
Fcn (((if !Magic.standalone then "" else "static ") ^ "void"),
ename,
[Decl (C.realtypep, rioarray);
Decl (C.realtypep, iioarray);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "(2 * VL)" (twdesc n))
and desc =
Printf.sprintf
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
n (stringify name) (flops_of tree)
(stride_to_solverparm !urs) "0"
(stride_to_solverparm !ums)
and register =
match !ditdif with
| DIT -> "X(kdft_dit_register)"
| DIF -> "X(kdft_dif_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
in
(unparse tree) ^ "\n" ^
(if !Magic.standalone then "" else init)
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,165 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let urs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given i/o stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let generate n =
let rioarray = "x"
and rs = "rs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x))
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
let ename = expand_name name in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in
let nt = num_twiddles n in
let byw = bytwiddle n sign (twiddle_array nt twarray) in
let vrs = either_stride (!urs) (C.SVar rs) in
let sms = stride_to_string "ms" !ums in
let locations = unique_array_c n in
let iloc =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript "BUG" vrs)
locations sms
and oloc =
locative_array_c n
(C.array_subscript rioarray vrs)
(C.array_subscript "BUG" vrs)
locations sms
in
let liloc = load_array_r n iloc in
let fft = Trig.dft_via_rdft in
let output =
match !ditdif with
| DIT -> array n (fft sign n (byw liloc))
| DIF -> array n (byw (fft sign n liloc))
in
let odag = store_array_r n oloc output in
let annot = standard_optimizer odag in
let vm = CVar m and vmb = CVar mb and vme = CVar me in
let body = Block (
[Decl ("INT", m);
Decl (C.realtypep, rioarray)],
[Stmt_assign (CVar rioarray,
CVar (if (sign < 0) then "ri" else "ii"));
For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (vmb,
bytwvl_vl (Integer nt))])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
byvl (CVar sms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
bytwvl (Integer nt)]);
make_volatile_stride n (CVar rs)
],
Asch annot)])
in
let tree =
Fcn (((if !Magic.standalone then "" else "static ") ^ "void"),
ename,
[Decl (C.realtypep, "ri");
Decl (C.realtypep, "ii");
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
n (stringify name) (flops_of tree)
(stride_to_solverparm !urs) "0"
(stride_to_solverparm !ums)
and register =
match !ditdif with
| DIT -> "X(kdft_dit_register)"
| DIF -> "X(kdft_dif_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
in
(unparse tree) ^ "\n" ^ (if !Magic.standalone then "" else init)
let main () =
begin
Simdmagic.simd_mode := true;
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,176 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let reload_twiddle = ref false
let urs = ref Stride_variable
let uvs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-reload-twiddle",
Arg.Unit(fun () -> reload_twiddle := true),
" do not collect common twiddle factors";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given input stride";
"-with-vs",
Arg.String(fun x -> uvs := arg_to_stride x),
" specialize for given vector stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let generate n =
let rioarray = "rio"
and iioarray = "iio"
and rs = "rs" and vs = "vs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
let sign = !Genutil.sign
and name = !Magic.codelet_name in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 false in
let nt = num_twiddles n in
let svs = either_stride (!uvs) (C.SVar vs)
and srs = either_stride (!urs) (C.SVar rs) in
let byw =
if !reload_twiddle then
array n (fun v -> bytwiddle n sign (twiddle_array nt twarray))
else
let a = bytwiddle n sign (twiddle_array nt twarray)
in fun v -> a
in
let locations = unique_v_array_c n n in
let ioi =
locative_v_array_c n n
(C.varray_subscript rioarray svs srs)
(C.varray_subscript iioarray svs srs)
locations "BUG"
and ioo =
locative_v_array_c n n
(C.varray_subscript rioarray svs srs)
(C.varray_subscript iioarray svs srs)
locations "BUG"
in
let lioi = load_v_array_c n n ioi in
let output =
match !ditdif with
| DIT -> array n (fun v -> Fft.dft sign n (byw v (lioi v)))
| DIF -> array n (fun v -> byw v (Fft.dft sign n (lioi v)))
in
let odag = store_v_array_c n n ioo (transpose output) in
let annot = standard_optimizer odag in
let vm = CVar m and vmb = CVar mb and vme = CVar me in
let body = Block (
[Decl ("INT", m)],
[For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (vmb, Integer nt)])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; Integer 1]);
Expr_assign (CVar rioarray, CPlus [CVar rioarray; CVar ms]);
Expr_assign (CVar iioarray, CPlus [CVar iioarray; CVar ms]);
Expr_assign (CVar twarray, CPlus [CVar twarray; Integer nt]);
make_volatile_stride (2*n) (CVar rs);
make_volatile_stride (2*0) (CVar vs)
],
Asch annot)]) in
let tree =
Fcn (("static void"), name,
[Decl (C.realtypep, rioarray);
Decl (C.realtypep, iioarray);
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl (C.stridetype, vs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(Twiddle.twinstr_to_c_string (twdesc n))
and desc =
Printf.sprintf
"static const ct_desc desc = {%d, \"%s\", twinstr, &GENUS, %s, %s, %s, %s};\n\n"
n name (flops_of tree)
(stride_to_solverparm !urs) (stride_to_solverparm !uvs)
(stride_to_solverparm !ums)
and register =
match !ditdif with
| DIT -> "X(kdft_ditsq_register)"
| DIF -> "X(kdft_difsq_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register name)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,187 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Util
open Genutil
open C
type ditdif = DIT | DIF
let ditdif = ref DIT
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
let reload_twiddle = ref false
let urs = ref Stride_variable
let uvs = ref Stride_variable
let ums = ref Stride_variable
let speclist = [
"-dit",
Arg.Unit(fun () -> ditdif := DIT),
" generate a DIT codelet";
"-dif",
Arg.Unit(fun () -> ditdif := DIF),
" generate a DIF codelet";
"-reload-twiddle",
Arg.Unit(fun () -> reload_twiddle := true),
" do not collect common twiddle factors";
"-with-rs",
Arg.String(fun x -> urs := arg_to_stride x),
" specialize for given input stride";
"-with-vs",
Arg.String(fun x -> uvs := arg_to_stride x),
" specialize for given vector stride";
"-with-ms",
Arg.String(fun x -> ums := arg_to_stride x),
" specialize for given ms"
]
let generate n =
let rioarray = "x"
and rs = "rs" and vs = "vs"
and twarray = "W"
and m = "m" and mb = "mb" and me = "me" and ms = "ms" in
let sign = !Genutil.sign
and name = !Magic.codelet_name
and byvl x = choose_simd x (ctimes (CVar "VL", x))
and bytwvl x = choose_simd x (ctimes (CVar "TWVL", x))
and bytwvl_vl x = choose_simd x (ctimes (CVar "(TWVL/VL)", x)) in
let ename = expand_name name in
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy 0 true in
let nt = num_twiddles n in
let svs = either_stride (!uvs) (C.SVar vs)
and srs = either_stride (!urs) (C.SVar rs) in
let sms = stride_to_string "ms" !ums in
let byw =
if !reload_twiddle then
array n (fun v -> bytwiddle n sign (twiddle_array nt twarray))
else
let a = bytwiddle n sign (twiddle_array nt twarray)
in fun v -> a
in
let locations = unique_v_array_c n n in
let ioi =
locative_v_array_c n n
(C.varray_subscript rioarray svs srs)
(C.varray_subscript "BUG" svs srs)
locations sms
and ioo =
locative_v_array_c n n
(C.varray_subscript rioarray svs srs)
(C.varray_subscript "BUG" svs srs)
locations sms
in
let lioi = load_v_array_c n n ioi in
let fft = Trig.dft_via_rdft in
let output =
match !ditdif with
| DIT -> array n (fun v -> fft sign n (byw v (lioi v)))
| DIF -> array n (fun v -> byw v (fft sign n (lioi v)))
in
let odag = store_v_array_c n n ioo (transpose output) in
let annot = standard_optimizer odag in
let vm = CVar m and vmb = CVar mb and vme = CVar me in
let body = Block (
[Decl ("INT", m);
Decl (C.realtypep, rioarray)],
[Stmt_assign (CVar rioarray,
CVar (if (sign < 0) then "ri" else "ii"));
For (list_to_comma
[Expr_assign (vm, vmb);
Expr_assign (CVar twarray,
CPlus [CVar twarray;
ctimes (vmb,
bytwvl_vl (Integer nt))])],
Binop (" < ", vm, vme),
list_to_comma
[Expr_assign (vm, CPlus [vm; byvl (Integer 1)]);
Expr_assign (CVar rioarray, CPlus [CVar rioarray;
byvl (CVar sms)]);
Expr_assign (CVar twarray, CPlus [CVar twarray;
bytwvl (Integer nt)]);
make_volatile_stride (2*n) (CVar rs);
make_volatile_stride (2*n) (CVar vs)
],
Asch annot)]) in
let tree =
Fcn (("static void"), ename,
[Decl (C.realtypep, "ri");
Decl (C.realtypep, "ii");
Decl (C.constrealtypep, twarray);
Decl (C.stridetype, rs);
Decl (C.stridetype, vs);
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
(twinstr_to_string "VL" (twdesc n))
and desc =
Printf.sprintf
"static const ct_desc desc = {%d, %s, twinstr, &GENUS, %s, %s, %s, %s};\n\n"
n (stringify name) (flops_of tree)
(stride_to_solverparm !urs)
(stride_to_solverparm !uvs)
(stride_to_solverparm !ums)
and register =
match !ditdif with
| DIT -> "X(kdft_ditsq_register)"
| DIF -> "X(kdft_difsq_register)"
in
let init =
"\n" ^
twinstr ^
desc ^
(declare_register_fcn name) ^
(Printf.sprintf "{\n%s(p, %s, &desc);\n}" register ename)
in
(unparse tree) ^ "\n" ^ init
let main () =
begin
parse (speclist @ Twiddle.speclist) usage;
print_string (generate (check_size ()));
end
let _ = main()

View File

@@ -0,0 +1,328 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* utilities common to all generators *)
open Util
let choose_simd a b = if !Simdmagic.simd_mode then b else a
let unique_array n = array n (fun _ -> Unique.make ())
let unique_array_c n =
array n (fun _ ->
(Unique.make (), Unique.make ()))
let unique_v_array_c veclen n =
array veclen (fun _ ->
unique_array_c n)
let locative_array_c n rarr iarr loc vs =
array n (fun i ->
let klass = Unique.make () in
let (rloc, iloc) = loc i in
(Variable.make_locative rloc klass rarr i vs,
Variable.make_locative iloc klass iarr i vs))
let locative_v_array_c veclen n rarr iarr loc vs =
array veclen (fun v ->
array n (fun i ->
let klass = Unique.make () in
let (rloc, iloc) = loc v i in
(Variable.make_locative rloc klass (rarr v) i vs,
Variable.make_locative iloc klass (iarr v) i vs)))
let temporary_array n =
array n (fun i -> Variable.make_temporary ())
let temporary_array_c n =
let tmpr = temporary_array n
and tmpi = temporary_array n
in
array n (fun i -> (tmpr i, tmpi i))
let temporary_v_array_c veclen n =
array veclen (fun v -> temporary_array_c n)
let temporary_array_c n =
let tmpr = temporary_array n
and tmpi = temporary_array n
in
array n (fun i -> (tmpr i, tmpi i))
let load_c (vr, vi) = Complex.make (Expr.Load vr, Expr.Load vi)
let load_r (vr, vi) = Complex.make (Expr.Load vr, Expr.Num (Number.zero))
let twiddle_array nt w =
array (nt/2) (fun i ->
let stride = choose_simd (C.SInteger 1) (C.SConst "TWVL")
and klass = Unique.make () in
let (refr, refi) = (C.array_subscript w stride (2 * i),
C.array_subscript w stride (2 * i + 1))
in
let (kr, ki) = (Variable.make_constant klass refr,
Variable.make_constant klass refi)
in
load_c (kr, ki))
let load_array_c n var = array n (fun i -> load_c (var i))
let load_array_r n var = array n (fun i -> load_r (var i))
let load_array_hc n var =
array n (fun i ->
if (i < n - i) then
load_c (var i)
else if (i > n - i) then
Complex.times Complex.i (load_c (var (n - i)))
else
load_r (var i))
let load_v_array_c veclen n var =
array veclen (fun v -> load_array_c n (var v))
let store_c (vr, vi) x = [Complex.store_real vr x; Complex.store_imag vi x]
let store_r (vr, vi) x = Complex.store_real vr x
let store_i (vr, vi) x = Complex.store_imag vi x
let assign_array_c n dst src =
List.flatten
(rmap (iota n)
(fun i ->
let (ar, ai) = Complex.assign (dst i) (src i)
in [ar; ai]))
let assign_v_array_c veclen n dst src =
List.flatten
(rmap (iota veclen)
(fun v ->
assign_array_c n (dst v) (src v)))
let vassign_v_array_c veclen n dst src =
List.flatten
(rmap (iota n) (fun i ->
List.flatten
(rmap (iota veclen)
(fun v ->
let (ar, ai) = Complex.assign (dst v i) (src v i)
in [ar; ai]))))
let store_array_r n dst src =
rmap (iota n)
(fun i -> store_r (dst i) (src i))
let store_array_c n dst src =
List.flatten
(rmap (iota n)
(fun i -> store_c (dst i) (src i)))
let store_array_hc n dst src =
List.flatten
(rmap (iota n)
(fun i ->
if (i < n - i) then
store_c (dst i) (src i)
else if (i > n - i) then
[]
else
[store_r (dst i) (Complex.real (src i))]))
let store_v_array_c veclen n dst src =
List.flatten
(rmap (iota veclen)
(fun v ->
store_array_c n (dst v) (src v)))
let elementwise f n a = array n (fun i -> f (a i))
let conj_array_c = elementwise Complex.conj
let real_array_c = elementwise Complex.real
let imag_array_c = elementwise Complex.imag
let elementwise_v f veclen n a =
array veclen (fun v ->
array n (fun i -> f (a v i)))
let conj_v_array_c = elementwise_v Complex.conj
let real_v_array_c = elementwise_v Complex.real
let imag_v_array_c = elementwise_v Complex.imag
let transpose f i j = f j i
let symmetrize f i j = if i <= j then f i j else f j i
(* utilities for command-line parsing *)
let standard_arg_parse_fail _ = failwith "too many arguments"
let dump_dag alist =
let fnam = !Magic.dag_dump_file in
if (String.length fnam > 0) then
let ochan = open_out fnam in
begin
To_alist.dump (output_string ochan) alist;
close_out ochan;
end
let dump_alist alist =
let fnam = !Magic.alist_dump_file in
if (String.length fnam > 0) then
let ochan = open_out fnam in
begin
Expr.dump (output_string ochan) alist;
close_out ochan;
end
let dump_asched asched =
let fnam = !Magic.asched_dump_file in
if (String.length fnam > 0) then
let ochan = open_out fnam in
begin
Annotate.dump (output_string ochan) asched;
close_out ochan;
end
(* utilities for optimization *)
let standard_scheduler dag =
let optim = Algsimp.algsimp dag in
let alist = To_alist.to_assignments optim in
let _ = dump_alist alist in
let _ = dump_dag alist in
if !Magic.precompute_twiddles then
Schedule.isolate_precomputations_and_schedule alist
else
Schedule.schedule alist
let standard_optimizer dag =
let sched = standard_scheduler dag in
let annot = Annotate.annotate [] sched in
let _ = dump_asched annot in
annot
let size = ref None
let sign = ref (-1)
let speclist = [
"-n", Arg.Int(fun i -> size := Some i), " generate a codelet of size <n>";
"-sign",
Arg.Int(fun i ->
if (i > 0) then
sign := 1
else
sign := (-1)),
" sign of transform";
]
let check_size () =
match !size with
| Some i -> i
| None -> failwith "must specify -n"
let expand_name name = if name = "" then "noname" else name
let declare_register_fcn name =
if name = "" then
"void NAME(planner *p)\n"
else
"void " ^ (choose_simd "X" "XSIMD") ^
"(codelet_" ^ name ^ ")(planner *p)\n"
let stringify name =
if name = "" then "STRINGIZE(NAME)" else
choose_simd ("\"" ^ name ^ "\"")
("XSIMD_STRING(\"" ^ name ^ "\")")
let parse user_speclist usage =
Arg.parse
(user_speclist @ speclist @ Magic.speclist @ Simdmagic.speclist)
standard_arg_parse_fail
usage
let rec list_to_c = function
[] -> ""
| [a] -> (string_of_int a)
| a :: b -> (string_of_int a) ^ ", " ^ (list_to_c b)
let rec list_to_comma = function
| [a; b] -> C.Comma (a, b)
| a :: b -> C.Comma (a, list_to_comma b)
| _ -> failwith "list_to_comma"
type stride = Stride_variable | Fixed_int of int | Fixed_string of string
let either_stride a b =
match a with
Fixed_int x -> C.SInteger x
| Fixed_string x -> C.SConst x
| _ -> b
let stride_fixed = function
Stride_variable -> false
| _ -> true
let arg_to_stride s =
try
Fixed_int (int_of_string s)
with Failure "int_of_string" ->
Fixed_string s
let stride_to_solverparm = function
Stride_variable -> "0"
| Fixed_int x -> string_of_int x
| Fixed_string x -> x
let stride_to_string s = function
Stride_variable -> s
| Fixed_int x -> string_of_int x
| Fixed_string x -> x
(* output the command line *)
let cmdline () =
List.fold_right (fun a b -> a ^ " " ^ b) (Array.to_list Sys.argv) ""
let unparse tree =
"/* Generated by: " ^ (cmdline ()) ^ "*/\n\n" ^
(C.print_cost tree) ^
(if String.length !Magic.inklude > 0
then
(Printf.sprintf "#include \"%s\"\n\n" !Magic.inklude)
else "") ^
(if !Simdmagic.simd_mode then
Simd.unparse_function tree
else
C.unparse_function tree)
let finalize_fcn ast =
let mergedecls = function
C.Block (d1, [C.Block (d2, s)]) -> C.Block (d1 @ d2, s)
| x -> x
and extract_constants =
if !Simdmagic.simd_mode then
Simd.extract_constants
else
C.extract_constants
in mergedecls (C.Block (extract_constants ast, [ast; C.Simd_leavefun]))
let twinstr_to_string vl x =
if !Simdmagic.simd_mode then
Twiddle.twinstr_to_simd_string vl x
else
Twiddle.twinstr_to_c_string x
let make_volatile_stride n x =
C.CCall ("MAKE_VOLATILE_STRIDE", C.Comma((C.Integer n), x))

View File

@@ -0,0 +1,71 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*
* The LittleSimplifier module implements a subset of the simplifications
* of the AlgSimp module. These simplifications can be executed
* quickly here, while they would take a long time using the heavy
* machinery of AlgSimp.
*
* For example, 0 * x is simplified to 0 tout court by the LittleSimplifier.
* On the other hand, AlgSimp would first simplify x, generating lots
* of common subexpressions, storing them in a table etc, just to
* discard all the work later. Similarly, the LittleSimplifier
* reduces the constant FFT in Rader's algorithm to a constant sequence.
*)
open Expr
let rec makeNum = function
| n -> Num n
and makeUminus = function
| Uminus a -> a
| Num a -> makeNum (Number.negate a)
| a -> Uminus a
and makeTimes = function
| (Num a, Num b) -> makeNum (Number.mul a b)
| (Num a, Times (Num b, c)) -> makeTimes (makeNum (Number.mul a b), c)
| (Num a, b) when Number.is_zero a -> makeNum (Number.zero)
| (Num a, b) when Number.is_one a -> b
| (Num a, b) when Number.is_mone a -> makeUminus b
| (Num a, Uminus b) -> Times (makeUminus (Num a), b)
| (a, (Num b as b')) -> makeTimes (b', a)
| (a, b) -> Times (a, b)
and makePlus l =
let rec reduceSum x = match x with
[] -> []
| [Num a] -> if Number.is_zero a then [] else x
| (Num a) :: (Num b) :: c ->
reduceSum ((makeNum (Number.add a b)) :: c)
| ((Num _) as a') :: b :: c -> b :: reduceSum (a' :: c)
| a :: s -> a :: reduceSum s
in match reduceSum l with
[] -> makeNum (Number.zero)
| [a] -> a
| [a; b] when a == b -> makeTimes (Num Number.two, a)
| [Times (Num a, b); Times (Num c, d)] when b == d ->
makeTimes (makePlus [Num a; Num c], b)
| a -> Plus a

View File

@@ -0,0 +1,25 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val makeNum : Number.number -> Expr.expr
val makeUminus : Expr.expr -> Expr.expr
val makeTimes : Expr.expr * Expr.expr -> Expr.expr
val makePlus : Expr.expr list -> Expr.expr

161
fftw-3.3.10/genfft/magic.ml Normal file
View File

@@ -0,0 +1,161 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* magic parameters *)
let verbose = ref false
let vneg = ref false
let karatsuba_min = ref 15
let karatsuba_variant = ref 2
let circular_min = ref 64
let rader_min = ref 13
let rader_list = ref [5]
let alternate_convolution = ref 17
let threemult = ref false
let inline_single = ref true
let inline_loads = ref false
let inline_loads_constants = ref false
let inline_constants = ref true
let trivial_stores = ref false
let locations_are_special = ref false
let strength_reduce_mul = ref false
let number_of_variables = ref 4
let codelet_name = ref "unnamed"
let randomized_cse = ref true
let dif_split_radix = ref false
let enable_fma = ref false
let deep_collect_depth = ref 1
let schedule_type = ref 0
let compact = ref false
let dag_dump_file = ref ""
let alist_dump_file = ref ""
let asched_dump_file = ref ""
let lisp_syntax = ref false
let network_transposition = ref true
let inklude = ref ""
let generic_arith = ref false
let reorder_insns = ref false
let reorder_loads = ref false
let reorder_stores = ref false
let precompute_twiddles = ref false
let newsplit = ref false
let standalone = ref false
let pipeline_latency = ref 0
let schedule_for_pipeline = ref false
let generate_bytw = ref true
(* command-line parser for magic parameters *)
let undocumented = " Undocumented voodoo parameter"
let set_bool var = Arg.Unit (fun () -> var := true)
let unset_bool var = Arg.Unit (fun () -> var := false)
let set_int var = Arg.Int(fun i -> var := i)
let set_string var = Arg.String(fun s -> var := s)
let speclist = [
"-name", set_string codelet_name, " set codelet name";
"-standalone", set_bool standalone, " standalone codelet (no desc)";
"-include", set_string inklude, undocumented;
"-verbose", set_bool verbose, " Enable verbose logging messages to stderr";
"-rader-min", set_int rader_min,
"<n> : Use Rader's algorithm for prime sizes >= <n>";
"-threemult", set_bool threemult,
" Use 3-multiply complex multiplications";
"-karatsuba-min", set_int karatsuba_min, undocumented;
"-karatsuba-variant", set_int karatsuba_variant, undocumented;
"-circular-min", set_int circular_min, undocumented;
"-compact", set_bool compact,
" Mangle variable names to reduce size of source code";
"-no-compact", unset_bool compact,
" Disable -compact";
"-dump-dag", set_string dag_dump_file, undocumented;
"-dump-alist", set_string alist_dump_file, undocumented;
"-dump-asched", set_string asched_dump_file, undocumented;
"-lisp-syntax", set_bool lisp_syntax, undocumented;
"-alternate-convolution", set_int alternate_convolution, undocumented;
"-deep-collect-depth", set_int deep_collect_depth, undocumented;
"-schedule-type", set_int schedule_type, undocumented;
"-pipeline-latency", set_int pipeline_latency, undocumented;
"-schedule-for-pipeline", set_bool schedule_for_pipeline, undocumented;
"-dif-split-radix", set_bool dif_split_radix, undocumented;
"-dit-split-radix", unset_bool dif_split_radix, undocumented;
"-generic-arith", set_bool generic_arith, undocumented;
"-no-generic-arith", unset_bool generic_arith, undocumented;
"-precompute-twiddles", set_bool precompute_twiddles, undocumented;
"-no-precompute-twiddles", unset_bool precompute_twiddles, undocumented;
"-inline-single", set_bool inline_single, undocumented;
"-no-inline-single", unset_bool inline_single, undocumented;
"-inline-loads", set_bool inline_loads, undocumented;
"-no-inline-loads", unset_bool inline_loads, undocumented;
"-inline-loads-constants", set_bool inline_loads_constants, undocumented;
"-no-inline-loads-constants",
unset_bool inline_loads_constants, undocumented;
"-inline-constants", set_bool inline_constants, undocumented;
"-no-inline-constants", unset_bool inline_constants, undocumented;
"-trivial-stores", set_bool trivial_stores, undocumented;
"-no-trivial-stores", unset_bool trivial_stores, undocumented;
"-locations-are-special", set_bool locations_are_special, undocumented;
"-no-locations-are-special", unset_bool locations_are_special, undocumented;
"-randomized-cse", set_bool randomized_cse, undocumented;
"-no-randomized-cse", unset_bool randomized_cse, undocumented;
"-network-transposition", set_bool network_transposition, undocumented;
"-no-network-transposition", unset_bool network_transposition, undocumented;
"-reorder-insns", set_bool reorder_insns, undocumented;
"-no-reorder-insns", unset_bool reorder_insns, undocumented;
"-reorder-loads", set_bool reorder_loads, undocumented;
"-no-reorder-loads", unset_bool reorder_loads, undocumented;
"-reorder-stores", set_bool reorder_stores, undocumented;
"-no-reorder-stores", unset_bool reorder_stores, undocumented;
"-newsplit", set_bool newsplit, undocumented;
"-vneg", set_bool vneg, undocumented;
"-fma", set_bool enable_fma, undocumented;
"-no-fma", unset_bool enable_fma, undocumented;
"-variables", set_int number_of_variables, undocumented;
"-strength-reduce-mul", set_bool strength_reduce_mul, undocumented;
"-no-strength-reduce-mul", unset_bool strength_reduce_mul, undocumented;
"-generate-bytw", set_bool generate_bytw, undocumented;
"-no-generate-bytw", unset_bool generate_bytw, undocumented;
]

View File

@@ -0,0 +1,75 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*************************************************************
* Monads
*************************************************************)
(*
* Phil Wadler has many well written papers about monads. See
* http://cm.bell-labs.com/cm/cs/who/wadler/
*)
(* vanilla state monad *)
module StateMonad = struct
let returnM x = fun s -> (x, s)
let (>>=) = fun m k ->
fun s ->
let (a', s') = m s
in let (a'', s'') = k a' s'
in (a'', s'')
let (>>) = fun m k ->
m >>= fun _ -> k
let rec mapM f = function
[] -> returnM []
| a :: b ->
f a >>= fun a' ->
mapM f b >>= fun b' ->
returnM (a' :: b')
let runM m x initial_state =
let (a, _) = m x initial_state
in a
let fetchState =
fun s -> s, s
let storeState newState =
fun _ -> (), newState
end
(* monad with built-in memoizing capabilities *)
module MemoMonad =
struct
open StateMonad
let memoizing lookupM insertM f k =
lookupM k >>= fun vMaybe ->
match vMaybe with
Some value -> returnM value
| None ->
f k >>= fun value ->
insertM k value >> returnM value
let runM initial_state m x = StateMonad.runM m x initial_state
end

View File

@@ -0,0 +1,164 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* The generator keeps track of numeric constants in symbolic
expressions using the abstract number type, defined in this file.
Our implementation of the number type uses arbitrary-precision
arithmetic from the built-in Num package in order to maintain an
accurate representation of constants. This allows us to output
constants with many decimal places in the generated C code,
ensuring that we will take advantage of the full precision
available on current and future machines.
Note that we have to write our own routine to compute roots of
unity, since the Num package only supplies simple arithmetic. The
arbitrary-precision operations in Num look like the normal
operations except that they have an appended slash (e.g. +/ -/ */
// etcetera). *)
open Num
type number = N of num
let makeNum n = N n
(* decimal digits of precision to maintain internally, and to print out: *)
let precision = 50
let print_precision = 45
let inveps = (Int 10) **/ (Int precision)
let epsilon = (Int 1) // inveps
let pinveps = (Int 10) **/ (Int print_precision)
let pepsilon = (Int 1) // pinveps
let round x = epsilon */ (round_num (x */ inveps))
let of_int n = N (Int n)
let zero = of_int 0
let one = of_int 1
let two = of_int 2
let mone = of_int (-1)
(* comparison predicate for real numbers *)
let equal (N x) (N y) = (* use both relative and absolute error *)
let absdiff = abs_num (x -/ y) in
absdiff <=/ pepsilon ||
absdiff <=/ pepsilon */ (abs_num x +/ abs_num y)
let is_zero = equal zero
let is_one = equal one
let is_mone = equal mone
let is_two = equal two
(* Note that, in the following computations, it is important to round
to precision epsilon after each operation. Otherwise, since the
Num package uses exact rational arithmetic, the number of digits
quickly blows up. *)
let mul (N a) (N b) = makeNum (round (a */ b))
let div (N a) (N b) = makeNum (round (a // b))
let add (N a) (N b) = makeNum (round (a +/ b))
let sub (N a) (N b) = makeNum (round (a -/ b))
let negative (N a) = (a </ (Int 0))
let negate (N a) = makeNum (minus_num a)
let greater a b = negative (sub b a)
let epsilonsq = epsilon */ epsilon
let epsilonsq2 = (Int 100) */ epsilonsq
let sqr a = a */ a
let almost_equal (N a) (N b) = (sqr (a -/ b)) <=/ epsilonsq2
(* find square root by Newton's method *)
let sqrt a =
let rec sqrt_iter guess =
let newguess = div (add guess (div a guess)) two in
if (almost_equal newguess guess) then newguess
else sqrt_iter newguess
in sqrt_iter (div a two)
let csub (xr, xi) (yr, yi) = (round (xr -/ yr), round (xi -/ yi))
let cdiv (xr, xi) r = (round (xr // r), round (xi // r))
let cmul (xr, xi) (yr, yi) = (round (xr */ yr -/ xi */ yi),
round (xr */ yi +/ xi */ yr))
let csqr (xr, xi) = (round (xr */ xr -/ xi */ xi), round ((Int 2) */ xr */ xi))
let cabssq (xr, xi) = xr */ xr +/ xi */ xi
let cconj (xr, xi) = (xr, minus_num xi)
let cinv x = cdiv (cconj x) (cabssq x)
let almost_equal_cnum (xr, xi) (yr, yi) =
(cabssq (xr -/ yr,xi -/ yi)) <=/ epsilonsq2
(* Put a complex number to an integer power by repeated squaring: *)
let rec ipow_cnum x n =
if (n == 0) then
(Int 1, Int 0)
else if (n < 0) then
cinv (ipow_cnum x (- n))
else if (n mod 2 == 0) then
ipow_cnum (csqr x) (n / 2)
else
cmul x (ipow_cnum x (n - 1))
let twopi = 6.28318530717958647692528676655900576839433879875021164194989
(* Find the nth (complex) primitive root of unity by Newton's method: *)
let primitive_root_of_unity n =
let rec root_iter guess =
let newguess = csub guess (cdiv (csub guess
(ipow_cnum guess (1 - n)))
(Int n)) in
if (almost_equal_cnum guess newguess) then newguess
else root_iter newguess
in let float_to_num f = (Int (truncate (f *. 1.0e9))) // (Int 1000000000)
in root_iter (float_to_num (cos (twopi /. (float n))),
float_to_num (sin (twopi /. (float n))))
let cexp n i =
if ((i mod n) == 0) then
(one,zero)
else
let (n2,i2) = Util.lowest_terms n i
in let (c,s) = ipow_cnum (primitive_root_of_unity n2) i2
in (makeNum c, makeNum s)
let to_konst (N n) =
let f = float_of_num n in
let f' = if f < 0.0 then f *. (-1.0) else f in
let f2 = if (f' >= 1.0) then (f' -. (float (truncate f'))) else f'
in let q = string_of_int (truncate(f2 *. 1.0E9))
in let r = "0000000000" ^ q
in let l = String.length r
in let prefix = if (f < 0.0) then "KN" else "KP" in
if (f' >= 1.0) then
(prefix ^ (string_of_int (truncate f')) ^ "_" ^
(String.sub r (l - 9) 9))
else
(prefix ^ (String.sub r (l - 9) 9))
let to_string (N n) = approx_num_fix print_precision n
let to_float (N n) = float_of_num n

View File

@@ -0,0 +1,49 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type number
val equal : number -> number -> bool
val of_int : int -> number
val zero : number
val one : number
val two : number
val mone : number
val is_zero : number -> bool
val is_one : number -> bool
val is_mone : number -> bool
val is_two : number -> bool
val mul : number -> number -> number
val div : number -> number -> number
val add : number -> number -> number
val sub : number -> number -> number
val negative : number -> bool
val greater : number -> number -> bool
val negate : number -> number
val sqrt : number -> number
(* cexp n i = (cos (2 * pi * i / n), sin (2 * pi * i / n)) *)
val cexp : int -> int -> (number * number)
val to_konst : number -> string
val to_string : number -> string
val to_float : number -> float

View File

@@ -0,0 +1,144 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*
* the oracle decrees whether the sign of an expression should
* be changed.
*
* Say the expression (A - B) appears somewhere. Elsewhere in the
* expression dag the expression (B - A) may appear.
* The oracle determines which of the two forms is canonical.
*
* Algorithm: evaluate the expression at a random input, and
* keep the expression with the positive sign.
*)
let make_memoizer hash equal =
let table = ref Assoctable.empty
in
(fun f k ->
match Assoctable.lookup hash equal k !table with
Some value -> value
| None ->
let value = f k in
begin
table := Assoctable.insert hash k value !table;
value
end)
let almost_equal x y =
let epsilon = 1.0E-8 in
(abs_float (x -. y) < epsilon) ||
(abs_float (x -. y) < epsilon *. (abs_float x +. abs_float y))
let absid = make_memoizer
(fun x -> Expr.hash_float (abs_float x))
(fun a b -> almost_equal a b || almost_equal (-. a) b)
(fun x -> x)
let make_random_oracle () = make_memoizer
Variable.hash
Variable.same
(fun _ -> (float (Random.bits())) /. 1073741824.0)
let the_random_oracle = make_random_oracle ()
let sum_list l = List.fold_right (+.) l 0.0
let eval_aux random_oracle =
let memoizing = make_memoizer Expr.hash (==) in
let rec eval x =
memoizing
(function
| Expr.Num x -> Number.to_float x
| Expr.NaN x -> Expr.transcendent_to_float x
| Expr.Load v -> random_oracle v
| Expr.Store (v, x) -> eval x
| Expr.Plus l -> sum_list (List.map eval l)
| Expr.Times (a, b) -> (eval a) *. (eval b)
| Expr.CTimes (a, b) ->
1.098612288668109691395245236 +.
1.609437912434100374600759333 *. (eval a) *. (eval b)
| Expr.CTimesJ (a, b) ->
0.9102392266268373936142401657 +.
0.6213349345596118107071993881 *. (eval a) *. (eval b)
| Expr.Uminus x -> -. (eval x))
x
in eval
let eval = eval_aux the_random_oracle
let should_flip_sign node =
let v = eval node in
let v' = absid v in
not (almost_equal v v')
(*
* determine with high probability if two expressions are equal.
*
* The test is randomized: if the two expressions have the
* same value for NTESTS random inputs, then they are proclaimed
* equal. (Note that two distinct linear functions L1(x0, x1, ..., xn)
* and L2(x0, x1, ..., xn) have the same value with probability
* 0 for random x's, and thus this test is way more paranoid than
* necessary.)
*)
let likely_equal a b =
let tolerance = 1.0e-8
and ntests = 20
in
let rec loop n =
if n = 0 then
true
else
let r = make_random_oracle () in
let va = eval_aux r a
and vb = eval_aux r b
in
if (abs_float (va -. vb)) >
tolerance *. (abs_float va +. abs_float vb +. 0.0001)
then
false
else
loop (n - 1)
in
match (a, b) with
(*
* Because of the way eval is constructed, we have
* eval (Store (v, x)) == eval x
* However, we never consider the two expressions equal
*)
| (Expr.Store _, _) -> false
| (_, Expr.Store _) -> false
(*
* Expressions of the form ``Uminus (Store _)''
* are artifacts of algsimp
*)
| ((Expr.Uminus (Expr.Store _)), _) -> false
| (_, Expr.Uminus (Expr.Store _)) -> false
| _ -> loop ntests
let hash x =
let f = eval x in
truncate (f *. 65536.0)

View File

@@ -0,0 +1,24 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val should_flip_sign : Expr.expr -> bool
val likely_equal : Expr.expr -> Expr.expr -> bool
val hash : Expr.expr -> int

View File

@@ -0,0 +1,236 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* This file contains the instruction scheduler, which finds an
efficient ordering for a given list of instructions.
The scheduler analyzes the DAG (directed acyclic graph) formed by
the instruction dependencies, and recursively partitions it. The
resulting schedule data structure expresses a "good" ordering
and structure for the computation.
The scheduler makes use of utilties in Dag and other packages to
manipulate the Dag and the instruction list. *)
open Dag
(*************************************************
* Dag scheduler
*************************************************)
let to_assignment node = (Expr.Assign (node.assigned, node.expression))
let makedag l = Dag.makedag
(List.map (function Expr.Assign (v, x) -> (v, x)) l)
let return x = x
let has_color c n = (n.color = c)
let set_color c n = (n.color <- c)
let has_either_color c1 c2 n = (n.color = c1 || n.color = c2)
let infinity = 100000
let cc dag inputs =
begin
Dag.for_all dag (fun node ->
node.label <- infinity);
(match inputs with
a :: _ -> bfs dag a 0
| _ -> failwith "connected");
return
((List.map to_assignment (List.filter (fun n -> n.label < infinity)
(Dag.to_list dag))),
(List.map to_assignment (List.filter (fun n -> n.label == infinity)
(Dag.to_list dag))))
end
let rec connected_components alist =
let dag = makedag alist in
let inputs =
List.filter (fun node -> Util.null node.predecessors)
(Dag.to_list dag) in
match cc dag inputs with
(a, []) -> [a]
| (a, b) -> a :: connected_components b
let single_load node =
match (node.input_variables, node.predecessors) with
([x], []) ->
Variable.is_constant x ||
(!Magic.locations_are_special && Variable.is_locative x)
| _ -> false
let loads_locative node =
match (node.input_variables, node.predecessors) with
| ([x], []) -> Variable.is_locative x
| _ -> false
let partition alist =
let dag = makedag alist in
let dag' = Dag.to_list dag in
let inputs =
List.filter (fun node -> Util.null node.predecessors) dag'
and outputs =
List.filter (fun node -> Util.null node.successors) dag'
and special_inputs = List.filter single_load dag' in
begin
let c = match !Magic.schedule_type with
| 1 -> RED; (* all nodes in the input partition *)
| -1 -> BLUE; (* all nodes in the output partition *)
| _ -> BLACK; (* node color determined by bisection algorithm *)
in Dag.for_all dag (fun node -> node.color <- c);
Util.for_list inputs (set_color RED);
(*
The special inputs are those input nodes that load a single
location or twiddle factor. Special inputs can end up either
in the blue or in the red part. These inputs are special
because they inherit a color from their neighbors: If a red
node needs a special input, the special input becomes red, but
if all successors of a special input are blue, the special
input becomes blue. Outputs are always blue, whether they be
special or not.
Because of the processing of special inputs, however, the final
partition might end up being composed only of blue nodes (which
is incorrect). In this case we manually reset all inputs
(whether special or not) to be red.
*)
Util.for_list special_inputs (set_color YELLOW);
Util.for_list outputs (set_color BLUE);
let rec loopi donep =
match (List.filter
(fun node -> (has_color BLACK node) &&
List.for_all (has_either_color RED YELLOW) node.predecessors)
dag') with
[] -> if (donep) then () else loopo true
| i ->
begin
Util.for_list i (fun node ->
begin
set_color RED node;
Util.for_list node.predecessors (set_color RED);
end);
loopo false;
end
and loopo donep =
match (List.filter
(fun node -> (has_either_color BLACK YELLOW node) &&
List.for_all (has_color BLUE) node.successors)
dag') with
[] -> if (donep) then () else loopi true
| o ->
begin
Util.for_list o (set_color BLUE);
loopi false;
end
in loopi false;
(* fix the partition if it is incorrect *)
if not (List.exists (has_color RED) dag') then
Util.for_list inputs (set_color RED);
return
((List.map to_assignment (List.filter (has_color RED) dag')),
(List.map to_assignment (List.filter (has_color BLUE) dag')))
end
type schedule =
Done
| Instr of Expr.assignment
| Seq of (schedule * schedule)
| Par of schedule list
(* produce a sequential schedule determined by the user *)
let rec sequentially = function
[] -> Done
| a :: b -> Seq (Instr a, sequentially b)
let schedule =
let rec schedule_alist = function
| [] -> Done
| [a] -> Instr a
| alist -> match connected_components alist with
| ([a]) -> schedule_connected a
| l -> Par (List.map schedule_alist l)
and schedule_connected alist =
match partition alist with
| (a, b) -> Seq (schedule_alist a, schedule_alist b)
in fun x ->
let () = Util.info "begin schedule" in
let res = schedule_alist x in
let () = Util.info "end schedule" in
res
(* partition a dag into two parts:
1) the set of loads from locatives and their successors,
2) all other nodes
This step separates the ``body'' of the dag, which computes the
actual fft, from the ``precomputations'' part, which computes e.g.
twiddle factors.
*)
let partition_precomputations alist =
let dag = makedag alist in
let dag' = Dag.to_list dag in
let loads = List.filter loads_locative dag' in
begin
Dag.for_all dag (set_color BLUE);
Util.for_list loads (set_color RED);
let rec loop () =
match (List.filter
(fun node -> (has_color RED node) &&
List.exists (has_color BLUE) node.successors)
dag') with
[] -> ()
| i ->
begin
Util.for_list i
(fun node ->
Util.for_list node.successors (set_color RED));
loop ()
end
in loop ();
return
((List.map to_assignment (List.filter (has_color BLUE) dag')),
(List.map to_assignment (List.filter (has_color RED) dag')))
end
let isolate_precomputations_and_schedule alist =
let (a, b) = partition_precomputations alist in
Seq (schedule a, schedule b)

View File

@@ -0,0 +1,30 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type schedule =
| Done
| Instr of Expr.assignment
| Seq of (schedule * schedule)
| Par of schedule list
val schedule : Expr.assignment list -> schedule
val sequentially : Expr.assignment list -> schedule
val isolate_precomputations_and_schedule : Expr.assignment list -> schedule

215
fftw-3.3.10/genfft/simd.ml Normal file
View File

@@ -0,0 +1,215 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
open Expr
open List
open Printf
open Variable
open Annotate
open Simdmagic
open C
let realtype = "V"
let realtypep = realtype ^ " *"
let constrealtype = "const " ^ realtype
let constrealtypep = constrealtype ^ " *"
let alignment_mod = 2
(*
* SIMD C AST unparser
*)
let foldr_string_concat l = fold_right (^) l ""
let rec unparse_by_twiddle nam tw src =
sprintf "%s(&(%s),%s)" nam (Variable.unparse tw) (unparse_expr src)
and unparse_store dst = function
| Times (NaN MULTI_A, x) ->
sprintf "STM%d(&(%s),%s,%s,&(%s));\n"
!Simdmagic.store_multiple
(Variable.unparse dst) (unparse_expr x)
(Variable.vstride_of_locative dst)
(Variable.unparse_for_alignment alignment_mod dst)
| Times (NaN MULTI_B, Plus stuff) ->
sprintf "STN%d(&(%s)%s,%s);\n"
!Simdmagic.store_multiple
(Variable.unparse dst)
(List.fold_right (fun x a -> "," ^ (unparse_expr x) ^ a) stuff "")
(Variable.vstride_of_locative dst)
| src_expr ->
sprintf "ST(&(%s),%s,%s,&(%s));\n"
(Variable.unparse dst) (unparse_expr src_expr)
(Variable.vstride_of_locative dst)
(Variable.unparse_for_alignment alignment_mod dst)
and unparse_expr =
let rec unparse_plus = function
| [a] -> unparse_expr a
| (Uminus (Times (NaN I, b))) :: c :: d -> op2 "VFNMSI" [b] (c :: d)
| c :: (Uminus (Times (NaN I, b))) :: d -> op2 "VFNMSI" [b] (c :: d)
| (Uminus (Times (NaN CONJ, b))) :: c :: d -> op2 "VFNMSCONJ" [b] (c :: d)
| c :: (Uminus (Times (NaN CONJ, b))) :: d -> op2 "VFNMSCONJ" [b] (c :: d)
| (Times (NaN I, b)) :: c :: d -> op2 "VFMAI" [b] (c :: d)
| c :: (Times (NaN I, b)) :: d -> op2 "VFMAI" [b] (c :: d)
| (Times (NaN CONJ, b)) :: (Uminus c) :: d -> op2 "VFMSCONJ" [b] (c :: d)
| (Uminus c) :: (Times (NaN CONJ, b)) :: d -> op2 "VFMSCONJ" [b] (c :: d)
| (Times (NaN CONJ, b)) :: c :: d -> op2 "VFMACONJ" [b] (c :: d)
| c :: (Times (NaN CONJ, b)) :: d -> op2 "VFMACONJ" [b] (c :: d)
| (Times (NaN _, b)) :: (Uminus c) :: d -> failwith "VFMS NaN"
| (Uminus c) :: (Times (NaN _, b)) :: d -> failwith "VFMS NaN"
| (Uminus (Times (a, b))) :: c :: d -> op3 "VFNMS" a b (c :: d)
| c :: (Uminus (Times (a, b))) :: d -> op3 "VFNMS" a b (c :: d)
| (Times (a, b)) :: (Uminus c) :: d -> op3 "VFMS" a b (c :: negate d)
| (Uminus c) :: (Times (a, b)) :: d -> op3 "VFMS" a b (c :: negate d)
| (Times (a, b)) :: c :: d -> op3 "VFMA" a b (c :: d)
| c :: (Times (a, b)) :: d -> op3 "VFMA" a b (c :: d)
| (Uminus a :: b) -> op2 "VSUB" b [a]
| (b :: Uminus a :: c) -> op2 "VSUB" (b :: c) [a]
| (a :: b) -> op2 "VADD" [a] b
| [] -> failwith "unparse_plus"
and op3 nam a b c =
nam ^ "(" ^ (unparse_expr a) ^ ", " ^ (unparse_expr b) ^ ", " ^
(unparse_plus c) ^ ")"
and op2 nam a b =
nam ^ "(" ^ (unparse_plus a) ^ ", " ^ (unparse_plus b) ^ ")"
and op1 nam a =
nam ^ "(" ^ (unparse_expr a) ^ ")"
and negate = function
| [] -> []
| (Uminus x) :: y -> x :: negate y
| x :: y -> (Uminus x) :: negate y
in function
| CTimes(Load tw, src)
when Variable.is_constant tw && !Magic.generate_bytw ->
unparse_by_twiddle "BYTW" tw src
| CTimesJ(Load tw, src)
when Variable.is_constant tw && !Magic.generate_bytw ->
unparse_by_twiddle "BYTWJ" tw src
| Load v when is_locative(v) ->
sprintf "LD(&(%s), %s, &(%s))" (Variable.unparse v)
(Variable.vstride_of_locative v)
(Variable.unparse_for_alignment alignment_mod v)
| Load v when is_constant(v) -> sprintf "LDW(&(%s))" (Variable.unparse v)
| Load v -> Variable.unparse v
| Num n -> sprintf "LDK(%s)" (Number.to_konst n)
| NaN n -> failwith "NaN in unparse_expr"
| Plus [] -> "0.0 /* bug */"
| Plus [a] -> " /* bug */ " ^ (unparse_expr a)
| Plus a -> unparse_plus a
| Times(NaN I,b) -> op1 "VBYI" b
| Times(NaN CONJ,b) -> op1 "VCONJ" b
| Times(a,b) ->
sprintf "VMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimes(a,Times(NaN I, b)) ->
sprintf "VZMULI(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimes(a,b) ->
sprintf "VZMUL(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimesJ(a,Times(NaN I, b)) ->
sprintf "VZMULIJ(%s, %s)" (unparse_expr a) (unparse_expr b)
| CTimesJ(a,b) ->
sprintf "VZMULJ(%s, %s)" (unparse_expr a) (unparse_expr b)
| Uminus a when !Magic.vneg -> op1 "VNEG" a
| Uminus a -> failwith "SIMD Uminus"
| _ -> failwith "unparse_expr"
and unparse_decl x = C.unparse_decl x
and unparse_ast ast =
let rec unparse_assignment = function
| Assign (v, x) when Variable.is_locative v ->
unparse_store v x
| Assign (v, x) ->
(Variable.unparse v) ^ " = " ^ (unparse_expr x) ^ ";\n"
and unparse_annotated force_bracket =
let rec unparse_code = function
| ADone -> ""
| AInstr i -> unparse_assignment i
| ASeq (a, b) ->
(unparse_annotated false a) ^ (unparse_annotated false b)
and declare_variables l =
let rec uvar = function
[] -> failwith "uvar"
| [v] -> (Variable.unparse v) ^ ";\n"
| a :: b -> (Variable.unparse a) ^ ", " ^ (uvar b)
in let rec vvar l =
let s = if !Magic.compact then 15 else 1 in
if (List.length l <= s) then
match l with
[] -> ""
| _ -> realtype ^ " " ^ (uvar l)
else
(vvar (Util.take s l)) ^ (vvar (Util.drop s l))
in vvar (List.filter Variable.is_temporary l)
in function
Annotate (_, _, decl, _, code) ->
if (not force_bracket) && (Util.null decl) then
unparse_code code
else "{\n" ^
(declare_variables decl) ^
(unparse_code code) ^
"}\n"
in match ast with
| Asch a -> (unparse_annotated true a)
| Return x -> "return " ^ unparse_ast x ^ ";"
| Simd_leavefun -> "VLEAVE();"
| For (a, b, c, d) ->
"for (" ^
unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c
^ ")" ^ unparse_ast d
| If (a, d) ->
"if (" ^
unparse_ast a
^ ")" ^ unparse_ast d
| Block (d, s) ->
if (s == []) then ""
else
"{\n" ^
foldr_string_concat (map unparse_decl d) ^
foldr_string_concat (map unparse_ast s) ^
"}\n"
| x -> C.unparse_ast x
and unparse_function = function
Fcn (typ, name, args, body) ->
let rec unparse_args = function
[Decl (a, b)] -> a ^ " " ^ b
| (Decl (a, b)) :: s -> a ^ " " ^ b ^ ", "
^ unparse_args s
| [] -> ""
| _ -> failwith "unparse_function"
in
(typ ^ " " ^ name ^ "(" ^ unparse_args args ^ ")\n" ^
unparse_ast body)
let extract_constants f =
let constlist = flatten (map expr_to_constants (C.ast_to_expr_list f))
in map
(fun n ->
Tdecl
("DVK(" ^ (Number.to_konst n) ^ ", " ^ (Number.to_string n) ^
");\n"))
(unique_constants constlist)

View File

@@ -0,0 +1,28 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val unparse_function : C.c_fcn -> string
val extract_constants : C.c_ast -> C.c_decl list
val realtype : string
val realtypep : string
val constrealtype : string
val constrealtypep : string

View File

@@ -0,0 +1,31 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* SIMD magic parameters *)
let simd_mode = ref false
let store_multiple = ref 1
open Magic
let speclist = [
"-simd", set_bool simd_mode, undocumented;
"-store-multiple", set_int store_multiple, undocumented;
]

View File

@@ -0,0 +1,288 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(*************************************************************
* Conversion of the dag to an assignment list
*************************************************************)
(*
* This function is messy. The main problem is that we want to
* inline dag nodes conditionally, depending on how many times they
* are used. The Right Thing to do would be to modify the
* state monad to propagate some of the state backwards, so that
* we know whether a given node will be used again in the future.
* This modification is trivial in a lazy language, but it is
* messy in a strict language like ML.
*
* In this implementation, we just do the obvious thing, i.e., visit
* the dag twice, the first to count the node usages, and the second to
* produce the output.
*)
open Monads.StateMonad
open Monads.MemoMonad
open Expr
let fresh = Variable.make_temporary
let node_insert x = Assoctable.insert Expr.hash x
let node_lookup x = Assoctable.lookup Expr.hash (==) x
let empty = Assoctable.empty
let fetchAl =
fetchState >>= (fun (al, _, _) -> returnM al)
let storeAl al =
fetchState >>= (fun (_, visited, visited') ->
storeState (al, visited, visited'))
let fetchVisited = fetchState >>= (fun (_, v, _) -> returnM v)
let storeVisited visited =
fetchState >>= (fun (al, _, visited') ->
storeState (al, visited, visited'))
let fetchVisited' = fetchState >>= (fun (_, _, v') -> returnM v')
let storeVisited' visited' =
fetchState >>= (fun (al, visited, _) ->
storeState (al, visited, visited'))
let lookupVisitedM' key =
fetchVisited' >>= fun table ->
returnM (node_lookup key table)
let insertVisitedM' key value =
fetchVisited' >>= fun table ->
storeVisited' (node_insert key value table)
let counting f x =
fetchVisited >>= (fun v ->
match node_lookup x v with
Some count ->
let incr_cnt =
fetchVisited >>= (fun v' ->
storeVisited (node_insert x (count + 1) v'))
in
begin
match x with
(* Uminus is always inlined. Visit child *)
Uminus y -> f y >> incr_cnt
| _ -> incr_cnt
end
| None ->
f x >> fetchVisited >>= (fun v' ->
storeVisited (node_insert x 1 v')))
let with_varM v x =
fetchAl >>= (fun al -> storeAl ((v, x) :: al)) >> returnM (Load v)
let inlineM = returnM
let with_tempM x = match x with
| Load v when Variable.is_temporary v -> inlineM x (* avoid trivial moves *)
| _ -> with_varM (fresh ()) x
(* declare a temporary only if node is used more than once *)
let with_temp_maybeM node x =
fetchVisited >>= (fun v ->
match node_lookup node v with
Some count ->
if (count = 1 && !Magic.inline_single) then
inlineM x
else
with_tempM x
| None ->
failwith "with_temp_maybeM")
type fma =
NO_FMA
| FMA of expr * expr * expr (* FMA (a, b, c) => a + b * c *)
| FMS of expr * expr * expr (* FMS (a, b, c) => -a + b * c *)
| FNMS of expr * expr * expr (* FNMS (a, b, c) => a - b * c *)
let good_for_fma (a, b) =
let good = function
| NaN I -> true
| NaN CONJ -> true
| NaN _ -> false
| Times(NaN _, _) -> false
| Times(_, NaN _) -> false
| _ -> true
in good a && good b
let build_fma l =
if (not !Magic.enable_fma) then NO_FMA
else match l with
| [a; Uminus (Times (b, c))] when good_for_fma (b, c) -> FNMS (a, b, c)
| [Uminus (Times (b, c)); a] when good_for_fma (b, c) -> FNMS (a, b, c)
| [Uminus a; Times (b, c)] when good_for_fma (b, c) -> FMS (a, b, c)
| [Times (b, c); Uminus a] when good_for_fma (b, c) -> FMS (a, b, c)
| [a; Times (b, c)] when good_for_fma (b, c) -> FMA (a, b, c)
| [Times (b, c); a] when good_for_fma (b, c) -> FMA (a, b, c)
| _ -> NO_FMA
let children_fma l = match build_fma l with
| FMA (a, b, c) -> Some (a, b, c)
| FMS (a, b, c) -> Some (a, b, c)
| FNMS (a, b, c) -> Some (a, b, c)
| NO_FMA -> None
let rec visitM x =
counting (function
| Load v -> returnM ()
| Num a -> returnM ()
| NaN a -> returnM ()
| Store (v, x) -> visitM x
| Plus a -> (match children_fma a with
None -> mapM visitM a >> returnM ()
| Some (a, b, c) ->
(* visit fma's arguments twice to make sure they are not inlined *)
visitM a >> visitM a >>
visitM b >> visitM b >>
visitM c >> visitM c)
| Times (a, b) -> visitM a >> visitM b
| CTimes (a, b) -> visitM a >> visitM b
| CTimesJ (a, b) -> visitM a >> visitM b
| Uminus a -> visitM a)
x
let visit_rootsM = mapM visitM
let rec expr_of_nodeM x =
memoizing lookupVisitedM' insertVisitedM'
(function x -> match x with
| Load v ->
if (Variable.is_temporary v) then
inlineM (Load v)
else if (Variable.is_locative v && !Magic.inline_loads) then
inlineM (Load v)
else if (Variable.is_constant v && !Magic.inline_loads_constants) then
inlineM (Load v)
else
with_tempM (Load v)
| Num a ->
if !Magic.inline_constants then
inlineM (Num a)
else
with_temp_maybeM x (Num a)
| NaN a -> inlineM (NaN a)
| Store (v, x) ->
expr_of_nodeM x >>=
(if !Magic.trivial_stores then with_tempM else inlineM) >>=
with_varM v
| Plus a ->
begin
match build_fma a with
FMA (a, b, c) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
expr_of_nodeM c >>= fun c' ->
with_temp_maybeM x (Plus [a'; Times (b', c')])
| FMS (a, b, c) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
expr_of_nodeM c >>= fun c' ->
with_temp_maybeM x
(Plus [Times (b', c'); Uminus a'])
| FNMS (a, b, c) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
expr_of_nodeM c >>= fun c' ->
with_temp_maybeM x
(Plus [a'; Uminus (Times (b', c'))])
| NO_FMA ->
mapM expr_of_nodeM a >>= fun a' ->
with_temp_maybeM x (Plus a')
end
| CTimes (Load _ as a, b) when !Magic.generate_bytw ->
expr_of_nodeM b >>= fun b' ->
with_tempM (CTimes (a, b'))
| CTimes (a, b) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
with_tempM (CTimes (a', b'))
| CTimesJ (Load _ as a, b) when !Magic.generate_bytw ->
expr_of_nodeM b >>= fun b' ->
with_tempM (CTimesJ (a, b'))
| CTimesJ (a, b) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
with_tempM (CTimesJ (a', b'))
| Times (a, b) ->
expr_of_nodeM a >>= fun a' ->
expr_of_nodeM b >>= fun b' ->
begin
match a' with
Num a'' when !Magic.strength_reduce_mul && Number.is_two a'' ->
(inlineM b' >>= fun b'' ->
with_temp_maybeM x (Plus [b''; b'']))
| _ -> with_temp_maybeM x (Times (a', b'))
end
| Uminus a ->
expr_of_nodeM a >>= fun a' ->
inlineM (Uminus a'))
x
let expr_of_rootsM = mapM expr_of_nodeM
let peek_alistM roots =
visit_rootsM roots >> expr_of_rootsM roots >> fetchAl
let wrap_assign (a, b) = Expr.Assign (a, b)
let to_assignments dag =
let () = Util.info "begin to_alist" in
let al = List.rev (runM ([], empty, empty) peek_alistM dag) in
let res = List.map wrap_assign al in
let () = Util.info "end to_alist" in
res
(* dump alist in `dot' format *)
let dump print alist =
let vs v = "\"" ^ (Variable.unparse v) ^ "\"" in
begin
print "digraph G {\n";
print "\tsize=\"6,6\";\n";
(* all input nodes have the same rank *)
print "{ rank = same;\n";
List.iter (fun (Expr.Assign (v, x)) ->
List.iter (fun y ->
if (Variable.is_locative y) then print("\t" ^ (vs y) ^ ";\n"))
(Expr.find_vars x))
alist;
print "}\n";
(* all output nodes have the same rank *)
print "{ rank = same;\n";
List.iter (fun (Expr.Assign (v, x)) ->
if (Variable.is_locative v) then print("\t" ^ (vs v) ^ ";\n"))
alist;
print "}\n";
(* edges *)
List.iter (fun (Expr.Assign (v, x)) ->
List.iter (fun y -> print("\t" ^ (vs y) ^ " -> " ^ (vs v) ^ ";\n"))
(Expr.find_vars x))
alist;
print "}\n";
end

View File

@@ -0,0 +1,24 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val to_assignments : Expr.expr list -> Expr.assignment list
val dump : (string -> unit) -> Expr.assignment list -> unit
val good_for_fma : Expr.expr * Expr.expr -> bool

152
fftw-3.3.10/genfft/trig.ml Normal file
View File

@@ -0,0 +1,152 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* trigonometric transforms *)
open Util
(* DFT of real input *)
let rdft sign n input =
Fft.dft sign n (Complex.real @@ input)
(* DFT of hermitian input *)
let hdft sign n input =
Fft.dft sign n (Complex.hermitian n input)
(* DFT real transform of vectors of two real numbers,
multiplication by (NaN I), and summation *)
let dft_via_rdft sign n input =
let f = rdft sign n input
in fun i ->
Complex.plus
[Complex.real (f i);
Complex.times (Complex.nan Expr.I) (Complex.imag (f i))]
(* Discrete Hartley Transform *)
let dht sign n input =
let f = Fft.dft sign n (Complex.real @@ input) in
(fun i ->
Complex.plus [Complex.real (f i); Complex.imag (f i)])
let trigI n input =
let twon = 2 * n in
let input' = Complex.hermitian twon input
in
Fft.dft 1 twon input'
let interleave_zero input = fun i ->
if (i mod 2) == 0
then Complex.zero
else
input ((i - 1) / 2)
let trigII n input =
let fourn = 4 * n in
let input' = Complex.hermitian fourn (interleave_zero input)
in
Fft.dft 1 fourn input'
let trigIII n input =
let fourn = 4 * n in
let twon = 2 * n in
let input' = Complex.hermitian fourn
(fun i ->
if (i == 0) then
Complex.real (input 0)
else if (i == twon) then
Complex.uminus (Complex.real (input 0))
else
Complex.antihermitian twon input i)
in
let dft = Fft.dft 1 fourn input'
in fun k -> dft (2 * k + 1)
let zero_extend n input = fun i ->
if (i >= 0 && i < n)
then input i
else Complex.zero
let trigIV n input =
let fourn = 4 * n
and eightn = 8 * n in
let input' = Complex.hermitian eightn
(zero_extend fourn (Complex.antihermitian fourn
(interleave_zero input)))
in
let dft = Fft.dft 1 eightn input'
in fun k -> dft (2 * k + 1)
let make_dct scale nshift trig =
fun n input ->
trig (n - nshift) (Complex.real @@ (Complex.times scale) @@
(zero_extend n input))
(*
* DCT-I: y[k] = sum x[j] cos(pi * j * k / n)
*)
let dctI = make_dct Complex.one 1 trigI
(*
* DCT-II: y[k] = sum x[j] cos(pi * (j + 1/2) * k / n)
*)
let dctII = make_dct Complex.one 0 trigII
(*
* DCT-III: y[k] = sum x[j] cos(pi * j * (k + 1/2) / n)
*)
let dctIII = make_dct Complex.half 0 trigIII
(*
* DCT-IV y[k] = sum x[j] cos(pi * (j + 1/2) * (k + 1/2) / n)
*)
let dctIV = make_dct Complex.half 0 trigIV
let shift s input = fun i -> input (i - s)
(* DST-x input := TRIG-x (input / i) *)
let make_dst scale nshift kshift jshift trig =
fun n input ->
Complex.real @@
(shift (- jshift)
(trig (n + nshift) (Complex.uminus @@
(Complex.times Complex.i) @@
(Complex.times scale) @@
Complex.real @@
(shift kshift (zero_extend n input)))))
(*
* DST-I: y[k] = sum x[j] sin(pi * j * k / n)
*)
let dstI = make_dst Complex.one 1 1 1 trigI
(*
* DST-II: y[k] = sum x[j] sin(pi * (j + 1/2) * k / n)
*)
let dstII = make_dst Complex.one 0 0 1 trigII
(*
* DST-III: y[k] = sum x[j] sin(pi * j * (k + 1/2) / n)
*)
let dstIII = make_dst Complex.half 0 1 0 trigIII
(*
* DST-IV y[k] = sum x[j] sin(pi * (j + 1/2) * (k + 1/2) / n)
*)
let dstIV = make_dst Complex.half 0 0 0 trigIV

View File

@@ -0,0 +1,35 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val rdft : int -> int -> Complex.signal -> Complex.signal
val hdft : int -> int -> Complex.signal -> Complex.signal
val dft_via_rdft : int -> int -> Complex.signal -> Complex.signal
val dht : int -> int -> Complex.signal -> Complex.signal
val dctI : int -> Complex.signal -> Complex.signal
val dctII : int -> Complex.signal -> Complex.signal
val dctIII : int -> Complex.signal -> Complex.signal
val dctIV : int -> Complex.signal -> Complex.signal
val dstI : int -> Complex.signal -> Complex.signal
val dstII : int -> Complex.signal -> Complex.signal
val dstIII : int -> Complex.signal -> Complex.signal
val dstIV : int -> Complex.signal -> Complex.signal

View File

@@ -0,0 +1,188 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* policies for loading/computing twiddle factors *)
open Complex
open Util
type twop = TW_FULL | TW_CEXP | TW_NEXT
let optostring = function
| TW_CEXP -> "TW_CEXP"
| TW_NEXT -> "TW_NEXT"
| TW_FULL -> "TW_FULL"
type twinstr = (twop * int * int)
let rec unroll_twfull l = match l with
| [] -> []
| (TW_FULL, v, n) :: b ->
(forall [] cons 1 n (fun i -> (TW_CEXP, v, i)))
@ unroll_twfull b
| a :: b -> a :: unroll_twfull b
let twinstr_to_c_string l =
let one (op, a, b) = Printf.sprintf "{ %s, %d, %d }" (optostring op) a b
in let rec loop first = function
| [] -> ""
| a :: b -> (if first then "\n" else ",\n") ^ (one a) ^ (loop false b)
in "{" ^ (loop true l) ^ "}"
let twinstr_to_simd_string vl l =
let one sep = function
| (TW_NEXT, 1, 0) -> sep ^ "{TW_NEXT, " ^ vl ^ ", 0}"
| (TW_NEXT, _, _) -> failwith "twinstr_to_simd_string"
| (TW_CEXP, v, b) -> sep ^ (Printf.sprintf "VTW(%d,%d)" v b)
| _ -> failwith "twinstr_to_simd_string"
in let rec loop first = function
| [] -> ""
| a :: b -> (one (if first then "\n" else ",\n") a) ^ (loop false b)
in "{" ^ (loop true (unroll_twfull l)) ^ "}"
let rec pow m n =
if (n = 0) then 1
else m * pow m (n - 1)
let rec is_pow m n =
n = 1 || ((n mod m) = 0 && is_pow m (n / m))
let rec log m n = if n = 1 then 0 else 1 + log m (n / m)
let rec largest_power_smaller_than m i =
if (is_pow m i) then i
else largest_power_smaller_than m (i - 1)
let rec smallest_power_larger_than m i =
if (is_pow m i) then i
else smallest_power_larger_than m (i + 1)
let rec_array n f =
let g = ref (fun i -> Complex.zero) in
let a = Array.init n (fun i -> lazy (!g i)) in
let h i = f (fun i -> Lazy.force a.(i)) i in
begin
g := h;
h
end
let ctimes use_complex_arith a b =
if use_complex_arith then
Complex.ctimes a b
else
Complex.times a b
let ctimesj use_complex_arith a b =
if use_complex_arith then
Complex.ctimesj a b
else
Complex.times (Complex.conj a) b
let make_bytwiddle sign use_complex_arith g f i =
if i = 0 then
f i
else if sign = 1 then
ctimes use_complex_arith (g i) (f i)
else
ctimesj use_complex_arith (g i) (f i)
(* various policies for computing/loading twiddle factors *)
let twiddle_policy_load_all v use_complex_arith =
let bytwiddle n sign w f =
make_bytwiddle sign use_complex_arith (fun i -> w (i - 1)) f
and twidlen n = 2 * (n - 1)
and twdesc r = [(TW_FULL, v, r);(TW_NEXT, 1, 0)]
in bytwiddle, twidlen, twdesc
(*
* if i is a power of two, then load w (log i)
* else let x = largest power of 2 less than i in
* let y = i - x in
* compute w^{x+y} = w^x * w^y
*)
let twiddle_policy_log2 v use_complex_arith =
let bytwiddle n sign w f =
let g = rec_array n (fun self i ->
if i = 0 then Complex.one
else if is_pow 2 i then w (log 2 i)
else let x = largest_power_smaller_than 2 i in
let y = i - x in
ctimes use_complex_arith (self x) (self y))
in make_bytwiddle sign use_complex_arith g f
and twidlen n = 2 * (log 2 (largest_power_smaller_than 2 (2 * n - 1)))
and twdesc n =
(List.flatten
(List.map
(fun i ->
if i > 0 && is_pow 2 i then
[TW_CEXP, v, i]
else
[])
(iota n)))
@ [(TW_NEXT, 1, 0)]
in bytwiddle, twidlen, twdesc
let twiddle_policy_log3 v use_complex_arith =
let rec terms_needed i pi s n =
if (s >= n - 1) then i
else terms_needed (i + 1) (3 * pi) (s + pi) n
in
let rec bytwiddle n sign w f =
let nterms = terms_needed 0 1 0 n in
let maxterm = pow 3 (nterms - 1) in
let g = rec_array (3 * n) (fun self i ->
if i = 0 then Complex.one
else if is_pow 3 i then w (log 3 i)
else if i = (n - 1) && maxterm >= n then
w (nterms - 1)
else let x = smallest_power_larger_than 3 i in
if (i + i >= x) then
let x = min x (n - 1) in
ctimesj use_complex_arith (self (x - i)) (self x)
else let x = largest_power_smaller_than 3 i in
ctimes use_complex_arith (self (i - x)) (self x))
in make_bytwiddle sign use_complex_arith g f
and twidlen n = 2 * (terms_needed 0 1 0 n)
and twdesc n =
(List.map
(fun i ->
let x = min (pow 3 i) (n - 1) in
TW_CEXP, v, x)
(iota ((twidlen n) / 2)))
@ [(TW_NEXT, 1, 0)]
in bytwiddle, twidlen, twdesc
let current_twiddle_policy = ref twiddle_policy_load_all
let twiddle_policy use_complex_arith =
!current_twiddle_policy use_complex_arith
let set_policy x = Arg.Unit (fun () -> current_twiddle_policy := x)
let set_policy_int x = Arg.Int (fun i -> current_twiddle_policy := x i)
let undocumented = " Undocumented twiddle policy"
let speclist = [
"-twiddle-load-all", set_policy twiddle_policy_load_all, undocumented;
"-twiddle-log2", set_policy twiddle_policy_log2, undocumented;
"-twiddle-log3", set_policy twiddle_policy_log3, undocumented;
]

View File

@@ -0,0 +1,32 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val speclist : (string * Arg.spec * string) list
type twinstr
val twiddle_policy :
int -> bool ->
(int -> int -> (int -> Complex.expr) -> (int -> Complex.expr) ->
int -> Complex.expr) *(int -> int) * (int -> twinstr list)
val twinstr_to_c_string : twinstr list -> string
val twinstr_to_simd_string : string -> twinstr list -> string

View File

@@ -0,0 +1,35 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* repository of unique tokens *)
type unique = Unique of int
let make =
let id = ref 0 in
fun () -> begin
id := !id + 1;
Unique !id
end
let same (Unique a) (Unique b) =
a = b

View File

@@ -0,0 +1,24 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type unique
val make : unit -> unique
val same : unique -> unique -> bool

176
fftw-3.3.10/genfft/util.ml Normal file
View File

@@ -0,0 +1,176 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
(* various utility functions *)
open List
open Unix
(*****************************************
* Integer operations
*****************************************)
(* fint the inverse of n modulo m *)
let invmod n m =
let rec loop i =
if ((i * n) mod m == 1) then i
else loop (i + 1)
in
loop 1
(* Yooklid's algorithm *)
let rec gcd n m =
if (n > m)
then gcd m n
else
let r = m mod n
in
if (r == 0) then n
else gcd r n
(* reduce the fraction m/n to lowest terms, modulo factors of n/n *)
let lowest_terms n m =
if (m mod n == 0) then
(1,0)
else
let nn = (abs n) in let mm = m * (n / nn)
in let mpos =
if (mm > 0) then (mm mod nn)
else (mm + (1 + (abs mm) / nn) * nn) mod nn
and d = gcd nn (abs mm)
in (nn / d, mpos / d)
(* find a generator for the multiplicative group mod p
(where p must be prime for a generator to exist!!) *)
exception No_Generator
let find_generator p =
let rec period x prod =
if (prod == 1) then 1
else 1 + (period x (prod * x mod p))
in let rec findgen x =
if (x == 0) then raise No_Generator
else if ((period x x) == (p - 1)) then x
else findgen ((x + 1) mod p)
in findgen 1
(* raise x to a power n modulo p (requires n > 0) (in principle,
negative powers would be fine, provided that x and p are relatively
prime...we don't need this functionality, though) *)
exception Negative_Power
let rec pow_mod x n p =
if (n == 0) then 1
else if (n < 0) then raise Negative_Power
else if (n mod 2 == 0) then pow_mod (x * x mod p) (n / 2) p
else x * (pow_mod x (n - 1) p) mod p
(******************************************
* auxiliary functions
******************************************)
let rec forall id combiner a b f =
if (a >= b) then id
else combiner (f a) (forall id combiner (a + 1) b f)
let sum_list l = fold_right (+) l 0
let max_list l = fold_right (max) l (-999999)
let min_list l = fold_right (min) l 999999
let count pred = fold_left
(fun a elem -> if (pred elem) then 1 + a else a) 0
let remove elem = List.filter (fun e -> (e != elem))
let cons a b = a :: b
let null = function
[] -> true
| _ -> false
let for_list l f = List.iter f l
let rmap l f = List.map f l
(* functional composition *)
let (@@) f g x = f (g x)
let forall_flat a b = forall [] (@) a b
let identity x = x
let rec minimize f = function
[] -> None
| elem :: rest ->
match minimize f rest with
None -> Some elem
| Some x -> if (f x) >= (f elem) then Some elem else Some x
let rec find_elem condition = function
[] -> None
| elem :: rest ->
if condition elem then
Some elem
else
find_elem condition rest
(* find x, x >= a, such that (p x) is true *)
let rec suchthat a pred =
if (pred a) then a else suchthat (a + 1) pred
(* print an information message *)
let info string =
if !Magic.verbose then begin
let now = Unix.times ()
and pid = Unix.getpid () in
prerr_string ((string_of_int pid) ^ ": " ^
"at t = " ^ (string_of_float now.tms_utime) ^ " : ");
prerr_string (string ^ "\n");
flush Pervasives.stderr;
end
(* iota n produces the list [0; 1; ...; n - 1] *)
let iota n = forall [] cons 0 n identity
(* interval a b produces the list [a; a + 1; ...; b - 1] *)
let interval a b = List.map ((+) a) (iota (b - a))
(*
* freeze a function, i.e., compute it only once on demand, and
* cache it into an array.
*)
let array n f =
let a = Array.init n (fun i -> lazy (f i))
in fun i -> Lazy.force a.(i)
let rec take n l =
match (n, l) with
(0, _) -> []
| (n, (a :: b)) -> a :: (take (n - 1) b)
| _ -> failwith "take"
let rec drop n l =
match (n, l) with
(0, _) -> l
| (n, (_ :: b)) -> drop (n - 1) b
| _ -> failwith "drop"
let either a b =
match a with
Some x -> x
| _ -> b

View File

@@ -0,0 +1,49 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
val invmod : int -> int -> int
val gcd : int -> int -> int
val lowest_terms : int -> int -> int * int
val find_generator : int -> int
val pow_mod : int -> int -> int -> int
val forall : 'a -> ('b -> 'a -> 'a) -> int -> int -> (int -> 'b) -> 'a
val sum_list : int list -> int
val max_list : int list -> int
val min_list : int list -> int
val count : ('a -> bool) -> 'a list -> int
val remove : 'a -> 'a list -> 'a list
val for_list : 'a list -> ('a -> unit) -> unit
val rmap : 'a list -> ('a -> 'b) -> 'b list
val cons : 'a -> 'a list -> 'a list
val null : 'a list -> bool
val (@@) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val forall_flat : int -> int -> (int -> 'a list) -> 'a list
val identity : 'a -> 'a
val minimize : ('a -> 'b) -> 'a list -> 'a option
val find_elem : ('a -> bool) -> 'a list -> 'a option
val suchthat : int -> (int -> bool) -> int
val info : string -> unit
val iota : int -> int list
val interval : int -> int -> int list
val array : int -> (int -> 'a) -> int -> 'a
val take : int -> 'a list -> 'a list
val drop : int -> 'a list -> 'a list
val either : 'a option -> 'a -> 'a

View File

@@ -0,0 +1,108 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type variable =
(* temporary variables generated automatically *)
| Temporary of int
(* memory locations, e.g., array elements *)
| Locative of (Unique.unique * Unique.unique *
(int -> string) * int * string)
(* constant values, e.g., twiddle factors *)
| Constant of (Unique.unique * string)
let hash v = Hashtbl.hash v
let same a b = (a == b)
let is_constant = function
| Constant _ -> true
| _ -> false
let is_temporary = function
| Temporary _ -> true
| _ -> false
let is_locative = function
| Locative _ -> true
| _ -> false
let same_location a b =
match (a, b) with
| (Locative (location_a, _, _, _, _), Locative (location_b, _, _, _, _)) ->
Unique.same location_a location_b
| _ -> false
let same_class a b =
match (a, b) with
| (Locative (_, class_a, _, _, _), Locative (_, class_b, _, _, _)) ->
Unique.same class_a class_b
| (Constant (class_a, _), Constant (class_b, _)) ->
Unique.same class_a class_b
| _ -> false
let make_temporary =
let tmp_count = ref 0
in fun () -> begin
tmp_count := !tmp_count + 1;
Temporary !tmp_count
end
let make_constant class_token name =
Constant (class_token, name)
let make_locative location_token class_token name i vs =
Locative (location_token, class_token, name, i, vs)
let vstride_of_locative = function
| Locative (_, _, _, _, vs) -> vs
| _ -> failwith "vstride_of_locative"
(* special naming conventions for variables *)
let rec base62_of_int k =
let x = k mod 62
and y = k / 62 in
let c =
if x < 10 then
Char.chr (x + Char.code '0')
else if x < 36 then
Char.chr (x + Char.code 'a' - 10)
else
Char.chr (x + Char.code 'A' - 36)
in
let s = String.make 1 c in
let r = if y == 0 then "" else base62_of_int y in
r ^ s
let varname_of_int k =
if !Magic.compact then
base62_of_int k
else
string_of_int k
let unparse = function
| Temporary k -> "T" ^ (varname_of_int k)
| Constant (_, name) -> name
| Locative (_, _, name, i, _) -> name i
let unparse_for_alignment m = function
| Locative (_, _, name, i, _) -> name (i mod m)
| _ -> failwith "unparse_for_alignment"

View File

@@ -0,0 +1,38 @@
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*)
type variable
val hash : variable -> int
val same : variable -> variable -> bool
val is_constant : variable -> bool
val is_temporary : variable -> bool
val is_locative : variable -> bool
val same_location : variable -> variable -> bool
val same_class : variable -> variable -> bool
val make_temporary : unit -> variable
val make_constant : Unique.unique -> string -> variable
val make_locative :
Unique.unique -> Unique.unique -> (int -> string) ->
int -> string -> variable
val unparse : variable -> string
val unparse_for_alignment : int -> variable -> string
val vstride_of_locative : variable -> string