<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Activity for sleef</title><link>https://sourceforge.net/p/sleef/activity/</link><description>Recent activity for sleef</description><language>en</language><lastBuildDate>Tue, 28 Jan 2025 09:36:02 -0000</lastBuildDate><item><title>sleef released /3.8/SLEEF 3.8 source code.zip</title><link>https://sourceforge.net/projects/sleef/files/3.8/SLEEF%203.8%20source%20code.zip/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 28 Jan 2025 09:36:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.8/SLEEF 3.8 source code.zip/download</guid></item><item><title>sleef released /3.8/SLEEF 3.8 source code.tar.gz</title><link>https://sourceforge.net/projects/sleef/files/3.8/SLEEF%203.8%20source%20code.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 28 Jan 2025 09:36:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.8/SLEEF 3.8 source code.tar.gz/download</guid></item><item><title>sleef released /3.8/README.md</title><link>https://sourceforge.net/projects/sleef/files/3.8/README.md/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 28 Jan 2025 09:36:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.8/README.md/download</guid></item><item><title>sleef updated /3.7/README.md</title><link>https://sourceforge.net/projects/sleef/files/3.7/README.md/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Wed, 18 Sep 2024 09:28:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.7/README.md/download</guid></item><item><title>sleef released /3.7/README.md</title><link>https://sourceforge.net/projects/sleef/files/3.7/README.md/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 17 Sep 2024 16:50:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.7/README.md/download</guid></item><item><title>sleef released /3.7/SLEEF 3.7 source code.zip</title><link>https://sourceforge.net/projects/sleef/files/3.7/SLEEF%203.7%20source%20code.zip/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 17 Sep 2024 16:50:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.7/SLEEF 3.7 source code.zip/download</guid></item><item><title>sleef released /3.7/SLEEF 3.7 source code.tar.gz</title><link>https://sourceforge.net/projects/sleef/files/3.7/SLEEF%203.7%20source%20code.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 17 Sep 2024 16:50:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.7/SLEEF 3.7 source code.tar.gz/download</guid></item><item><title>sleef released /3.6.1/SLEEF 3.6.1 source code.zip</title><link>https://sourceforge.net/projects/sleef/files/3.6.1/SLEEF%203.6.1%20source%20code.zip/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Mon, 10 Jun 2024 10:29:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.6.1/SLEEF 3.6.1 source code.zip/download</guid></item><item><title>sleef released /3.6.1/SLEEF 3.6.1 source code.tar.gz</title><link>https://sourceforge.net/projects/sleef/files/3.6.1/SLEEF%203.6.1%20source%20code.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Mon, 10 Jun 2024 10:29:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.6.1/SLEEF 3.6.1 source code.tar.gz/download</guid></item><item><title>sleef released /3.6.1/README.md</title><link>https://sourceforge.net/projects/sleef/files/3.6.1/README.md/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Mon, 10 Jun 2024 10:29:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.6.1/README.md/download</guid></item><item><title>sleef released /sleef-3.5.1.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.5.1.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 15 Sep 2020 03:26:05 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.5.1.tar.gz/download</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d70f2c6171/?limit=25#dcbd</link><description>My pleasure. Please make a pull request on github for your patch.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 03 Dec 2019 22:56:07 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d70f2c6171/?limit=25#dcbd</guid></item><item><title>Joseph Alvis posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d70f2c6171/?limit=25#6592</link><description>Hi, I just wanted to say thank you to the contributors of this library. What a gem! Thank you for sharing this freely and for all the time and effort you must have put into it. I just started porting a library I'm working on from windows to linux and I needed something to bridge the gap between the intel SVML intrinsics supported on MSVC and gcc. Not only does Sleef do that but it actually seems superior in several respects. Rarely does something just fit like a glove but now I want to update my...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Joseph Alvis</dc:creator><pubDate>Tue, 03 Dec 2019 11:13:35 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d70f2c6171/?limit=25#6592</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/363ad9b20c/?limit=25#a9fc</link><description>gcc does not have an option for using sleef via its automatic vectorizer. If you really need it, please send a request to the gcc mailing list.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 25 Nov 2019 03:07:22 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/363ad9b20c/?limit=25#a9fc</guid></item><item><title>bule posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/363ad9b20c/?limit=25#639a</link><description>I can make use of the gnuabi library with LLVM or armclang by implicitly add -mveclib/-fsimdmath in each compiler on an aarch64 server. However, gcc seems do not have such options that can make use of the gnuabi library. I was not able to enable it after a long try and I also cannot supoort or example that who have done it with gcc on aarch64 on google. So, Does the libsleefgnuabi applicable with gcc on aarch64? If so Can anyone give me an example on how to use the library with gcc on aarch64?</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">bule</dc:creator><pubDate>Mon, 25 Nov 2019 02:45:51 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/363ad9b20c/?limit=25#639a</guid></item><item><title>sleef released /sleef-3.4.1.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.4.1.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 01 Oct 2019 01:52:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.4.1.tar.gz/download</guid></item><item><title>sleef released /sleef-3.4.0.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.4.0.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Sun, 28 Apr 2019 07:24:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.4.0.tar.gz/download</guid></item><item><title>sleef released /3.4.0/README.md</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/3.4.0/README.md/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Sun, 28 Apr 2019 03:41:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.4.0/README.md/download</guid></item><item><title>sleef released /3.4.0/Functions that return consistent results across platforms are added.zip</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/3.4.0/Functions%2520that%2520return%2520consistent%2520results%2520across%2520platforms%2520are%2520added.zip/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Sun, 28 Apr 2019 03:41:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.4.0/Functions%20that%20return%20consistent%20results%20across%20platforms%20are%20added.zip/download</guid></item><item><title>sleef released /3.4.0/Functions that return consistent results across platforms are added.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/3.4.0/Functions%2520that%2520return%2520consistent%2520results%2520across%2520platforms%2520are%2520added.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Sun, 28 Apr 2019 03:41:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/3.4.0/Functions%20that%20return%20consistent%20results%20across%20platforms%20are%20added.tar.gz/download</guid></item><item><title>nomoo posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#8fed</link><description>Thanks a lot, Naoki!</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">nomoo</dc:creator><pubDate>Wed, 29 Aug 2018 16:13:51 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#8fed</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#c7ed</link><description>Version 3.3.1 is now released.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Wed, 22 Aug 2018 04:07:54 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#c7ed</guid></item><item><title>sleef released /sleef-3.3.1.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.3.1.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Wed, 22 Aug 2018 04:03:04 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.3.1.tar.gz/download</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#f8e2</link><description>I will soon release 3.3.1 which includes FreeBSD support.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 20 Aug 2018 15:05:19 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#f8e2</guid></item><item><title>nomoo posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#5c65</link><description>Thanks, Naoki Well, we have a Machine Learning dept. and all our production is based on FreeBSD. We're using poudriere with custom ports and everything works like a charm.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">nomoo</dc:creator><pubDate>Mon, 20 Aug 2018 12:35:11 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#5c65</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#5433</link><description>I think @iotamudelta was working on that. https://github.com/shibatch/sleef/pull/205 I don't know how much progress he has made since then. Do you think there is demand for FreeBSD support? For desktop environment, the share of FreeBSD is very small nowadays. There could be demand by video game developers, though.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Wed, 15 Aug 2018 16:40:00 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#5433</guid></item><item><title>nomoo posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#a10e</link><description>Hello. Are you going to support freebsd? I can help to make a port of sleef.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">nomoo</dc:creator><pubDate>Wed, 15 Aug 2018 16:27:24 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d91955bc/?limit=25#a10e</guid></item><item><title>sleef released /sleef-3.3.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.3.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Fri, 06 Jul 2018 08:32:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.3.tar.gz/download</guid></item><item><title>sleef released /sleef-3.2.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.2.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Mon, 26 Feb 2018 09:22:04 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.2.tar.gz/download</guid></item><item><title>sleef released /sleef-3.2.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.2.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Mon, 26 Feb 2018 09:15:04 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.2.tar.gz/download</guid></item><item><title>Naoki Shibata modified a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#f37a</link><description>It is decided when it is run. Please see : http://sleef.org/additional.xhtml#dispatcher</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 19 Feb 2018 13:15:49 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#f37a</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#f37a</link><description>It is decided when it is run. Please see : http://ito-lab.naist.jp/~n-sibata/sleef/additional.xhtml#dispatcher</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 19 Feb 2018 13:15:15 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#f37a</guid></item><item><title>Royi modified a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#0a0d</link><description>Is it on Run Time or decided at Compile Time? Thank You.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Royi</dc:creator><pubDate>Mon, 19 Feb 2018 13:11:54 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#0a0d</guid></item><item><title>Royi posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#0a0d</link><description>Is it on Run Time or decided at compile time? Thank You.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Royi</dc:creator><pubDate>Mon, 19 Feb 2018 13:06:14 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#0a0d</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#cf7c</link><description>Hello, SLEEF has a dispatcher that automatically detects available vector extensions and chooses the best one. You can just call Sleef_sind2_u10, for example, and it automatically use AVX2 if available.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 19 Feb 2018 12:30:00 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=25#cf7c</guid></item><item><title>Royi posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#072d</link><description>Hello, I was wondering, how does Sleef decides which Instruction Set to use? For instance, if I call it on __m256 will AVX be automatically used? If yet, will it use AVX or AVX2? Namely, just more information. I for once would be happy for a mode where __m128 goes to SSE4 Code Path and __m256 goes to AVX2 code path. Could that be configureable?</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Royi</dc:creator><pubDate>Mon, 19 Feb 2018 12:20:19 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/d258c9f2/?limit=100#072d</guid></item><item><title>Royi posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/2efa313b/?limit=100#832b</link><description>Hello, I want to try Sleef on my project. The problem is I can't follow installation instructions on Windws. I follow this page: http://sleef.org/compile.xhtml I have Visula Sudio 2017 with CMAKE module installed. Yet cmake isn't available on. Moreover, I don't get why there are 3 sections. What's the different between the Quick Start and Compiling on Windows? Which should I follow? On another note, when I call Sleef operation on __m256 does it neceseraly use AVX? Namely what's the connection, in...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Royi</dc:creator><pubDate>Mon, 19 Feb 2018 11:48:30 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/2efa313b/?limit=100#832b</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#f0af</link><description>David, We are now talking about what we should implement. Please join our discussion. https://github.com/shibatch/sleef/issues/47</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Wed, 09 Aug 2017 08:01:28 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#f0af</guid></item><item><title>David Parks modified a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</link><description>I don't want to make this too difficult. I'm wondering if something along the lines of (for single precision): if (x86-64) FMA3 is available (would also need an FMA4 version) define mlaf to be: float mlaf(float x, float y, float z) { asm ("vfmadd132ss %2, %1, %0" : "+r"(x) : "x"(z), "x"(y) :); return x; } Would also need to have the negative flavor of the FMA operation. But this does come back to one of my original questions, if there was an FMA imlementation of mlaf for scalar, there still is one...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 17:32:47 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</guid></item><item><title>David Parks modified a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</link><description>I don't want to make this too difficult. I'm wondering if something along the lines of (for single precision): if (x86-64) FMA3 is available (would also need an FMA4 version) define mlaf to be: float mlaf(float x, float y, float z) { asm ("vfmadd132ss %2, %1, %0" : "=rx"(x) : "x"(z), "x"(y) :); return x; } Would also need to have the negative flavor of the FMA operation. But this does come back to one of my original questions, if there was an FMA imlementation of mlaf for scalar, there still is one...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 16:54:47 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#dea8</link><description>It would be harder to maintain the scalar version in a way that it produces the same results as the vectorized version. There was absolutely no intention to use the scalar version in that way, and there are so many subtle differences. Writing new helper files is not hard and easier to maintain.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 15:48:13 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#dea8</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</link><description>I don't want to make this too difficult. I'm wondering if something along the lines of (for single precision): if (x86-64) FMA3 is available (would also need an FMA4 version) define mlaf to be: float mlaf(float x, float y, float z) { asm ("vfmadd123ss %2, %1, %0" : "=rx"(x) : "x"(y), "x"(z) :); return x; } Would also need to have the negative flavor of the FMA operation. But this does come back to one of my original questions, if there was an FMA imlementation of mlaf for scalar, there still is one...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 15:41:41 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#27f5</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#d1e6</link><description>It is possible that I write a new helper file for scalar calculation. Does that satisfy your requirements?</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 13:22:37 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#d1e6</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#8773/052f</link><description>Hi, What would be most desirable is that for the scalar and vector versions of a function produce the same numerical results on a particular processor. Ignoring the DD functions, if we use FMA for a term with the vector implementation, it is probably best to use FMA for the corresponding term in the scalar version of the intrinsic. Again, thank you for your time and consideration. Best regards, Dave</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 13:17:20 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#8773/052f</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#8773</link><description>On some architectures, non-fused combined multiplication and addition is available. On architectures with fma, it is safe to assume that fma is faster than any other combination of mulciplication and addition. In order to speed up calculation, mla to fma conversion is enabled. I still don't undertstand your requirements, but is it okay to just make functions that do not use DD calculation to produce the same numerical results? For those functions with DD calculations, there is no way to make them...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 11:47:35 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#8773</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#04fd</link><description>Dear Shibata-san, Thank you for your response, but I don't think I explained our concern properly. We are fully aware of the delicate nuances of FMA arithmetic - like when is B=A*A; C=A^2-B != 0 :-) But, if I understand your explanation, it is a conscience decision that the scalar and vector algorithms may not produce the same numerical results. Going back to the two routines in the thread I used as examples, the "mlaf" in the scalar version does not use FMA arithmetic because of contraction being...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 11:38:43 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#04fd</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#520f</link><description>Ah yes, line 178 is a bug. With this bug, FMA4 won't be selected by the dispatcher.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 11:24:35 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#520f</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#9da7/67aa</link><description>Hi, Not to be argumentative, but I believe that on line 178, without the parentheses around ENABLEFMA4, the expansion is equivalent to $(E)NABLEFMA4. Best regards, Dave</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Tue, 08 Aug 2017 11:20:09 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#9da7/67aa</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#9da7</link><description>Thank you for pointing out. This is not a bug, since ENABLEFMA4 variable is not defined if FMA4 is not supported by the compiler. We know that the current makefile is messy and confusing. We are now working on migrating the build system to cmake, which should be tidier.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 07:23:19 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#9da7</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#531b</link><description>Hi David, FMA is not just a combination of multiplication and addition, but those two operations are fused. https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply.E2.80.93add The property of FMA is exploited to speed up computation of double-double arithmetic. https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format#Double-double_arithmetic Because of this, it is impossible to make FMA and non-FMA implementation produce exactly the same values. SLEEF defines...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 08 Aug 2017 03:10:40 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#531b</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#a999</link><description>Hi, I think that there is a typo on lines 37-39 of the src/libm/Makefile: ifdef ENABLEFMA4 OBJ+=sleefdpfma4.o sleefspfma4.o endif I believe those lies should be: ifeq ($(ENABLEFMA4), 1) OBJ+= endif A similar error occurs on line 178, should be: ifeq ($(ENABLEFMA4), 1) Best regards, Dave</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Mon, 07 Aug 2017 22:23:19 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/5b641594/?limit=25#a999</guid></item><item><title>David Parks posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#e8e1</link><description>Hello, We have a requirement to support numerical intrinsics across various hardware architectures and have found using SLEEF's LIBM a strong possibility with its both scalar and vector (SIMD) implementations of all the core routines. But we have a need that the scalar and vector routines generate the same numerical results. Of immediate concern is that it appears that some of the scalar and vector implementations of the same routine have (possibly) different operations. For example, the scalar version...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">David Parks</dc:creator><pubDate>Mon, 07 Aug 2017 21:37:02 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/bed4208e/?limit=25#e8e1</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/03a19c77/?limit=25#4874</link><description>SLEEF implements a mechanism to select coefficients according to the argument of kernel functions in a few functions. In order to do so, the default generic method uses multiple blending functions, but for AVX2, permutation is used for faster computation. I tried a similar thing using tbl instruction for AArch64, but I found it is actually slower than the generic method. Today I found an article describing a similar experience. http://www.cnx-software.com/2017/08/07/how-arm-nerfed-neon-permute-i...</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 07 Aug 2017 16:27:21 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/03a19c77/?limit=25#4874</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/9e73a8c9/?limit=25#c7f8</link><description>Do not hesitate to ask questions.</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Mon, 24 Jul 2017 02:29:54 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/9e73a8c9/?limit=25#c7f8</guid></item><item><title>sleef released /sleef-3.1.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.1.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Wed, 19 Jul 2017 09:51:02 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.1.tar.gz/download</guid></item><item><title>Naoki Shibata posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/72f659b2/?limit=25#2f6a</link><description>Still testing</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata</dc:creator><pubDate>Tue, 11 Jul 2017 17:53:10 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/72f659b2/?limit=25#2f6a</guid></item><item><title>Naoki Shibata ( 柴田 直樹 ) posted a comment on discussion General Discussion</title><link>https://sourceforge.net/p/sleef/discussion/general/thread/72f659b2/?limit=25#2f48</link><description>This is for testing</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Naoki Shibata ( 柴田 直樹 )</dc:creator><pubDate>Tue, 11 Jul 2017 09:28:44 -0000</pubDate><guid>https://sourceforge.net/p/sleef/discussion/general/thread/72f659b2/?limit=25#2f48</guid></item><item><title>sleef released /sleef-3.0.tar.gz</title><link>https://sourceforge.nethttps%3A//sourceforge.net/projects/sleef/files/sleef-3.0.tar.gz/download</link><description/><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">sleef</dc:creator><pubDate>Tue, 11 Jul 2017 08:13:03 -0000</pubDate><guid>https://sourceforge.net/projects/sleef/files/sleef-3.0.tar.gz/download</guid></item></channel></rss>