diff options
author | ryoon <ryoon@pkgsrc.org> | 2020-07-04 05:02:14 +0000 |
---|---|---|
committer | ryoon <ryoon@pkgsrc.org> | 2020-07-04 05:02:14 +0000 |
commit | a0af5fc10183f7f767699e29ded60f42c0025369 (patch) | |
tree | af97d5e1df45b99f4826ce28920c17231117cbcf /misc/libreoffice/patches | |
parent | 4f94fd3d40e8642922413d335d1fc215493ff1de (diff) | |
download | pkgsrc-a0af5fc10183f7f767699e29ded60f42c0025369.tar.gz |
libreoffice: Update to 6.4.5.2
Changelog:
6.4.5.2
Bugs fixed compared to 6.4.5 rc1:
ofz#20904 check bounds [Caolán McNamara]
tdf#83309 FILEOPEN: NUMBERING: text paragraph indentation/tab stops in .DOCX displayed incorrectly with tab [Vasily Melenchuk]
tdf#120394 FILEOPEN: Numbers appear in numbered list MSO created in attached DOC without visible number format [Vasily Melenchuk]
tdf#121384 Crash of Writer opening any document with "invalid" python file in home directory (linux) [Caolán McNamara]
tdf#130999 Greek dictionary cannot be loaded on Collabora Online (4.2.0.4) [Andras Timar]
tdf#131353 Core build fails with poppler >= 0.86.0 [Martin Whitaker]
tdf#131357 Help files lack syntax definition for some Calc functions [Steve Fanning]
tdf#132093 Editing snap lines: dialog decreases entered values when drawing scale is 1:many [Stephan Bergmann]
tdf#132254 When copying a block/rectangle, the pasted lines are in reverse order [Michael Stahl]
tdf#132321 Deleting a paragraph at start or end of section deletes the object anchored to it [Michael Stahl]
tdf#132326 CRASH: undoing paste of frames [Michael Stahl]
tdf#132431 Crash: "vector::_M_fill_insert" with large sumifs [Luboš Luňák]
tdf#132597 Crash in: swlo.dll [Michael Stahl]
tdf#132725 Crash in: BigPtrArray::Index2Block(unsigned __int64) anchor to paragraph [Michael Stahl]
tdf#132744 Images anchored to character in table not pasted [Michael Stahl]
tdf#132754 DOCX import: invalid list start numbering with 0.0.0. [Vasily Melenchuk]
tdf#133326 Crash after redo (steps in comment 5) [Noel Grandin]
tdf#133629 FORMATTING: Calc crashes on setting borders to all cells on more than one sheet simultaneously [Noel Grandin]
tdf#133641 CRASH: after clicking on a drop-down form [Michael Stahl]
tdf#133862 Crash when trying to customize toolbar during embedded Equation editing [Caolán McNamara]
tdf#133981 CRASH: Undoing deletion of selection [Michael Stahl]
tdf#133982 CRASH: copying content in nested table [Michael Stahl]
tdf#133985 "Merge and Center Cells" dialog: default (active) button is "Cancel" [Caolán McNamara]
tdf#133990 CRASH: Undoing table deletion [Michael Stahl]
tdf#134019 Calc FILEOPEN Crash in: sclo.dll [Caolán McNamara]
tdf#134021 Crash swlo!SwFrame::CheckPageDescs+0x12d6: [Michael Stahl]
tdf#134023 Select the table by dragging over the columns become harder [Justin Luth]
tdf#134243 Hang launching mailmerge wizard with a specific file [Caolán McNamara]
6.4.5.1
Bugs fixed compared to 6.4.4 RC2
deb#961473 set A4 in extras/source/shellnew templates [Rene Engelhard]
rhbz#1844263 unchecked nullptr return from SfxViewFrame::Current() [Caolán McNamara]
tdf#37268 Data Pilot and sheet local defined named range insert menu entry inactive [Julien Nabet]
tdf#64690 EDITING: Process hang on find/replace in Basic code involving "\&" [Andreas Heinisch]
tdf#78352 FILEOPEN: DOCX - Incorrect space between bullet and first line text due to wrong tab stop at value [Vasily Melenchuk]
tdf#79082 FILESAVE: Tab positions not being retained in PPT and being lost in PPTX [Piet van Oostrum, Samuel Mehrbrodt]
tdf#79998 FILESAVE: XLSX export breaks or some sheet names (sheet name length >31 characters) [Serge Krot]
tdf#94628 Fileopen: Docx custom numbering triangle symbol incorrectly displayed as a dot [Vasily Melenchuk]
tdf#95189 Paragraph numbering on one paragraph resets when it shouldn't [Vasily Melenchuk]
tdf#97416 Editing: Database password properties in ODB file not re-settable via UI [Julien Nabet]
tdf#97694 Base macros cannot be digitally signed [Samuel Mehrbrodt]
tdf#101856 MAILMERGE Add conditional to expand / collapse bookmarks [Serge Krot]
tdf#102625 Formula editor breaks surrogate pairs [Julien Nabet]
tdf#103602 new documents fail ODF validation with Error: unexpected attribute "draw:fill" [Michael Stahl]
tdf#104017 FILESAVE: DOC: New rows are displayed on table after RT [Justin Luth]
tdf#106181 FILESAVE: Check boxes get lost when saving as .XLSX [Serge Krot]
tdf#108496 FILEOPEN: DOCX file numbered list restarted from 1 following section break assumes previous numbered list sequence after 1 [Vasily Melenchuk]
tdf#114258 BASIC: Bad select after scrolling in Object Catalog [Caolán McNamara]
tdf#116883 FILEOPEN DOCX Numbering format shown as "1.1.1-" even though it should be "1-1-1-" [Vasily Melenchuk]
tdf#120394 FILEOPEN: Numbers appear in numbered list MSO created in attached DOC without visible number format [Vasily Melenchuk]
tdf#122408 linked subform shows empty when slave field is a function call [Lionel Elie Mamane]
tdf#122461 SQL parser doesn't accept identifiers containing newlines [and gives a syntax error when Query Wizard correctly generates a query containing one] [Lionel Elie Mamane]
tdf#123801 Small caps captions do not display the Figure/Illustration/etc. category label [Caolán McNamara]
tdf#124790 Mailmerge: Background image in printed address labels not visible [Miklos Vajna]
tdf#125609 radiobutton receives item status changed event after listbox value selection [Noel Grandin]
tdf#125981 The "Default" button for quotes in AutoCorrect options doesn't reset the display of replacement [Caolán McNamara]
tdf#126025 Extrusion on/off is faulty in saving for Fontwork favorite 40 [Stephan Bergmann]
tdf#126468 Unable to deselect 'Visible' flags in Base query, if field is set for "sorting" [Julien Nabet]
tdf#127778 FILEOPEN DOCX Margin setting is not considered if smaller than heading size [Miklos Vajna]
tdf#127889 Can't unmerge cells using tabbed user interface [andreas kainz]
tdf#128665 EDITING: Undo takes action on another shape [Vasily Melenchuk]
tdf#128689 Request to Localize More Formatting Icons into Korean UI [Rizal Muttaqin]
tdf#129620 Crash when I press down arrow in edit header dialog using e.g, Japanese keyboard [Caolán McNamara]
tdf#129702 [UI] Smart tags wrongly listed in AutoCorrect options dialog [Caolán McNamara]
tdf#130354 Cannot sign existing PDF, ' .tmp does not exist' from 6.4 [Miklos Vajna]
tdf#130479 Inlined BMP fills do not render [Miklos Vajna]
tdf#130770 Autofilter not updated on data change (automatically or manually) [scito]
tdf#130792 Draw crashes when trying to add a point in a multiline ( steps in comment 8 ) [Noel Grandin]
tdf#131076 UI: Text import dialogue default focus is 'Character set' instead of 'OK' [Caolán McNamara]
tdf#131333 Cannot change data point icons in Calc's XY or points and lines chart [Caolán McNamara]
tdf#131366 Crash when I type the letter "i" after the letter "f" in a paragraph with style "emphasis" and font "Linux Biolinum G" [Gabor Kelemen]
tdf#131423 Function Wizard in Calc allows too many parameters [Eike Rathke]
tdf#131684 Crash when doing undoing [Xisco Fauli, Michael Stahl]
tdf#132169 Drawing line width changes randomly while changing [Caolán McNamara]
tdf#132173 Function Wizard does not enter array formulas with inline arrays correctly [Eike Rathke]
tdf#132187 For every repeated paste the page count (incl. content) doubles [Xisco Fauli]
tdf#132236 Crash in: swlo.dll ( steps in comment 12) [Michael Stahl]
tdf#132267 Scrollers for long menus are not shown / black [Miklos Vajna]
tdf#132514 FILEOPEN DOCX Table character direct formatting not applied with another table in footer [László Németh]
tdf#132594 No display of labels in pie charts if xlsx created with Excel 2007 (regression in 6.4 vs 6.0 and 5.2) [Tünde Tóth]
tdf#132596 Crash in: SwPosition::SwPosition(SwPosition const &) [Michael Stahl]
tdf#132623 Custom properties dialog contents don't expand to fill the dialog when resized [andreas kainz]
tdf#132626 CAPTION DIALOG: Can't type dot in caption category [STrunning mail merge wizard for doc from tdf#119942 [Caolán McNamara]
tdf#132730 Assertion if apply Ctrl+Shift+M on group [Julien Nabet]
tdf#132732 Restart Numbering in Paragraph with chosen Number impossible (GTK3) [Caolán McNamara]
tdf#13273iklos Vajna]
tdf#132814 EDITING: Impossible to change an existing Integer primarykey in MySQL/MariaDB to AutoValue in GUI [Julien Nabet]
tdf#132832 XLayoutConstrains.calcAdjustedSize doesn't return proper size for multiline checkbox and hyperlink ementary, Sifr, Sukapura: Wrong folder action icons in open dialog with gen backend [Rizal Muttaqin]
tdf#133012 elementary: The gripper icon looks like a “More Options” menu [Rizal Muttaqin]
tdf#133036 CRASH: customizing notebookbar (gen) [Caolán McNamara]
tdf#133053 Crash in: x11::SelectionManager::handleDragEvent [Caolán McNamara]
tdf#133250 Colibre: Inconsistent Icon for Red, Blue and Green [Rizal Muttaqin]
tdf#133267 Undo inserting a row above extremely slow [Luboš Luňák]
tdf#133270 MAILMERGE progress is not updated during export of many files [Serge Krot]
tdf#133271 FILESAVE: Vertical text saves as horizontal to ODT (but OK to DOCX) [Miklos Vajna]
tdf#133348 Crash when using undo 3 times in comment reply [Julien Nabet]
tdf#133411 Replacing lots of cell styles consumes too much memory/CPU [Caolán McNamara]
tdf#133426 Crash attempting to save mail merge to file multiple times in parallel [Michael Weghorn]
tdf#133474 FILEOPEN: DOC: Images are not loaded [Miklos Vajna]
tdf#133595 FILESAVE XLSX Excel gives invalid content error when opening file with shape [Serge Krot]
tdf#133604 FILESAVE: DOCX: fields are not displayed in MSO after RT [Michael Stahl]
tdf#133633 LO crashes with SuperNova Magnifier & ScreenReader [Michael Weghorn]
Diffstat (limited to 'misc/libreoffice/patches')
-rw-r--r-- | misc/libreoffice/patches/patch-configure.ac | 15 | ||||
-rw-r--r-- | misc/libreoffice/patches/patch-download.lst | 12 | ||||
-rw-r--r-- | misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx | 798 |
3 files changed, 8 insertions, 817 deletions
diff --git a/misc/libreoffice/patches/patch-configure.ac b/misc/libreoffice/patches/patch-configure.ac index ece01e8f435..77f50176e00 100644 --- a/misc/libreoffice/patches/patch-configure.ac +++ b/misc/libreoffice/patches/patch-configure.ac @@ -1,10 +1,8 @@ -$NetBSD: patch-configure.ac,v 1.15 2020/02/05 14:10:35 ryoon Exp $ +$NetBSD: patch-configure.ac,v 1.16 2020/07/04 05:02:14 ryoon Exp $ * Add NetBSD/aarch64 support. * Disable freetype2 and fontconfig from pkgsrc explicitly. to fix X11_TYPE=native build and internal pdfium's internal freetype build. -* Exclude c++2a to avoid some C++ errors. And c++17 causes alloca(3) problem. - Use gnu++17 dialect instead. * Fix some static links. * Include header files from gpgme correctly. * Find pkgsrc/devel/apache-ant. @@ -31,15 +29,6 @@ $NetBSD: patch-configure.ac,v 1.15 2020/02/05 14:10:35 ryoon Exp $ if test "$test_fontconfig" = "yes"; then PKG_CHECK_MODULES([FONTCONFIG], [fontconfig >= 2.4.1]) SYSTEM_FONTCONFIG=TRUE -@@ -6446,7 +6452,7 @@ CXXFLAGS_CXX11= - if test "$COM" = MSC -a "$COM_IS_CLANG" != TRUE; then - CXXFLAGS_CXX11='-std:c++17 -Zc:__cplusplus' - elif test "$GCC" = "yes" -o "$COM_IS_CLANG" = TRUE; then -- my_flags='-std=c++2a -std=c++17 -std=c++1z' -+ my_flags='-std=gnu++17 -std=gnu++1z' - for flag in $my_flags; do - if test "$COM" = MSC; then - flag="-Xclang $flag" @@ -8097,7 +8103,7 @@ else if test "$COM" = "MSC"; then LIBJPEG_LIBS="${WORKDIR}/UnpackedTarball/libjpeg-turbo/.libs/libjpeg.lib" @@ -85,7 +74,7 @@ $NetBSD: patch-configure.ac,v 1.15 2020/02/05 14:10:35 ryoon Exp $ # C++ library doesn't come with fancy gpgmepp-config, check for headers the old-fashioned way - AC_CHECK_HEADER(gpgme++/gpgmepp_version.h, [ GPGMEPP_CFLAGS=-I/usr/include/gpgme++ ], -+ AC_CHECK_HEADER(gpgme++/gpgmepp_version.h, [ GPGMEPP_CFLAGS=-I/usr/pkg/include/gpgme++ ], ++ AC_CHECK_HEADER(gpgme++/gpgmepp_version.h, [ GPGMEPP_CFLAGS=-I@PREFIX@/include/gpgme++ ], [AC_MSG_ERROR([gpgmepp headers not found, install gpgmepp development package])], []) # progress_callback is the only func with plain C linkage # checking for it also filters out older, KDE-dependent libgpgmepp versions diff --git a/misc/libreoffice/patches/patch-download.lst b/misc/libreoffice/patches/patch-download.lst index 4ccfcefca2d..f07461191d0 100644 --- a/misc/libreoffice/patches/patch-download.lst +++ b/misc/libreoffice/patches/patch-download.lst @@ -1,15 +1,15 @@ -$NetBSD: patch-download.lst,v 1.15 2020/02/05 14:10:35 ryoon Exp $ +$NetBSD: patch-download.lst,v 1.16 2020/07/04 05:02:14 ryoon Exp $ ---- download.lst.orig 2020-01-22 23:14:16.000000000 +0000 +--- download.lst.orig 2020-06-24 20:35:34.000000000 +0000 +++ download.lst @@ -97,8 +97,8 @@ export GPGME_TARBALL := gpgme-1.9.0.tar. - export GRAPHITE_SHA256SUM := d47d387161db7f7ebade1920aa7cbdc797e79772597d8b55e80b58d1071bcc36 - export GRAPHITE_TARBALL := graphite2-minimal-1.3.13.tgz + export GRAPHITE_SHA256SUM := b8e892d8627c41888ff121e921455b9e2d26836978f2359173d19825da62b8fc + export GRAPHITE_TARBALL := graphite2-minimal-1.3.14.tgz export HARFBUZZ_SHA256SUM := 9cf7d117548265f95ca884e2f4c9fafaf4e17d45a67b11107147b79eed76c966 -export HARFBUZZ_TARBALL := harfbuzz-2.6.0.tar.xz -export HSQLDB_SHA256SUM := d30b13f4ba2e3b6a2d4f020c0dee0a9fb9fc6fbcc2d561f36b78da4bf3802370 -+export HARFBUZZ_TARBALL := harfbuzz-2.6.4.tar.xz -+export HSQLDB_SHA256SUM := 9413b8d96132d699687ef914ebb8c50440efc87b3f775d25856d7ec347c03c12 ++export HARFBUZZ_TARBALL := harfbuzz-2.6.8.tar.xz ++export HSQLDB_SHA256SUM := 6648a571a27f186e47094121f0095e1b809e918b3037c630c7f38ffad86e3035 export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip export HUNSPELL_SHA256SUM := 57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b90435944ff951 export HUNSPELL_TARBALL := hunspell-1.7.0.tar.gz diff --git a/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx b/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx deleted file mode 100644 index d0b81b7dd00..00000000000 --- a/misc/libreoffice/patches/patch-sc_source_core_opencl_formulagroupcl.cxx +++ /dev/null @@ -1,798 +0,0 @@ -$NetBSD: patch-sc_source_core_opencl_formulagroupcl.cxx,v 1.1 2020/04/20 13:00:48 joerg Exp $ - ---- sc/source/core/opencl/formulagroupcl.cxx.orig 2020-04-20 00:48:10.479759827 +0000 -+++ sc/source/core/opencl/formulagroupcl.cxx -@@ -1335,185 +1335,7 @@ public: - } - - /// Emit the definition for the auxiliary reduction kernel -- virtual void GenSlidingWindowFunction( std::stringstream& ss ) -- { -- if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) -- { -- std::string name = Base::GetName(); -- ss << "__kernel void " << name; -- ss << "_reduction(__global double* A, " -- "__global double *result,int arrayLength,int windowSize){\n"; -- ss << " double tmp, current_result =" << -- mpCodeGen->GetBottom(); -- ss << ";\n"; -- ss << " int writePos = get_group_id(1);\n"; -- ss << " int lidx = get_local_id(0);\n"; -- ss << " __local double shm_buf[256];\n"; -- if (mpDVR->IsStartFixed()) -- ss << " int offset = 0;\n"; -- else // if (!mpDVR->IsStartFixed()) -- ss << " int offset = get_group_id(1);\n"; -- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = offset + windowSize;\n"; -- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = windowSize + get_group_id(1);\n"; -- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- ss << " end = min(end, arrayLength);\n"; -- -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " int loop = arrayLength/512 + 1;\n"; -- ss << " for (int l=0; l<loop; l++){\n"; -- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -- ss << " int loopOffset = l*512;\n"; -- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -- ss << " tmp = legalize(" << mpCodeGen->Gen2( -- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; -- ss << " tmp = legalize(" << mpCodeGen->Gen2( -- "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n"; -- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -- ss << " tmp = legalize(" << mpCodeGen->Gen2( -- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; -- ss << " shm_buf[lidx] = tmp;\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " for (int i = 128; i >0; i/=2) {\n"; -- ss << " if (lidx < i)\n"; -- ss << " shm_buf[lidx] = "; -- // Special case count -- if (dynamic_cast<OpCount*>(mpCodeGen.get())) -- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -- else -- ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " current_result ="; -- if (dynamic_cast<OpCount*>(mpCodeGen.get())) -- ss << "current_result + shm_buf[0]"; -- else -- ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); -- ss << ";\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " result[writePos] = current_result;\n"; -- ss << "}\n"; -- } -- else -- { -- std::string name = Base::GetName(); -- /*sum reduction*/ -- ss << "__kernel void " << name << "_sum"; -- ss << "_reduction(__global double* A, " -- "__global double *result,int arrayLength,int windowSize){\n"; -- ss << " double tmp, current_result =" << -- mpCodeGen->GetBottom(); -- ss << ";\n"; -- ss << " int writePos = get_group_id(1);\n"; -- ss << " int lidx = get_local_id(0);\n"; -- ss << " __local double shm_buf[256];\n"; -- if (mpDVR->IsStartFixed()) -- ss << " int offset = 0;\n"; -- else // if (!mpDVR->IsStartFixed()) -- ss << " int offset = get_group_id(1);\n"; -- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = offset + windowSize;\n"; -- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = windowSize + get_group_id(1);\n"; -- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- ss << " end = min(end, arrayLength);\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " int loop = arrayLength/512 + 1;\n"; -- ss << " for (int l=0; l<loop; l++){\n"; -- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -- ss << " int loopOffset = l*512;\n"; -- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -- ss << " tmp = legalize("; -- ss << "(A[loopOffset + lidx + offset]+ tmp)"; -- ss << ", tmp);\n"; -- ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)"; -- ss << ", tmp);\n"; -- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -- ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)"; -- ss << ", tmp);\n"; -- ss << " shm_buf[lidx] = tmp;\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " for (int i = 128; i >0; i/=2) {\n"; -- ss << " if (lidx < i)\n"; -- ss << " shm_buf[lidx] = "; -- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " current_result ="; -- ss << "current_result + shm_buf[0]"; -- ss << ";\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " result[writePos] = current_result;\n"; -- ss << "}\n"; -- /*count reduction*/ -- ss << "__kernel void " << name << "_count"; -- ss << "_reduction(__global double* A, " -- "__global double *result,int arrayLength,int windowSize){\n"; -- ss << " double tmp, current_result =" << -- mpCodeGen->GetBottom(); -- ss << ";\n"; -- ss << " int writePos = get_group_id(1);\n"; -- ss << " int lidx = get_local_id(0);\n"; -- ss << " __local double shm_buf[256];\n"; -- if (mpDVR->IsStartFixed()) -- ss << " int offset = 0;\n"; -- else // if (!mpDVR->IsStartFixed()) -- ss << " int offset = get_group_id(1);\n"; -- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = offset + windowSize;\n"; -- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -- ss << " int end = windowSize + get_group_id(1);\n"; -- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -- ss << " int end = windowSize;\n"; -- ss << " end = min(end, arrayLength);\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " int loop = arrayLength/512 + 1;\n"; -- ss << " for (int l=0; l<loop; l++){\n"; -- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -- ss << " int loopOffset = l*512;\n"; -- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; -- ss << ", tmp);\n"; -- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)"; -- ss << ", tmp);\n"; -- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; -- ss << ", tmp);\n"; -- ss << " shm_buf[lidx] = tmp;\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " for (int i = 128; i >0; i/=2) {\n"; -- ss << " if (lidx < i)\n"; -- ss << " shm_buf[lidx] = "; -- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " current_result ="; -- ss << "current_result + shm_buf[0];"; -- ss << ";\n"; -- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -- ss << " }\n"; -- ss << " if (lidx == 0)\n"; -- ss << " result[writePos] = current_result;\n"; -- ss << "}\n"; -- } -- -- } -+ virtual void GenSlidingWindowFunction( std::stringstream& ss ); - - virtual std::string GenSlidingWindowDeclRef( bool ) const - { -@@ -1527,213 +1349,28 @@ public: - - /// Controls how the elements in the DoubleVectorRef are traversed - size_t GenReductionLoopHeader( -- std::stringstream& ss, int nResultSize, bool& needBody ) -+ std::stringstream& ss, int nResultSize, bool& needBody ); -+ -+ virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ); -+ -+ ~ParallelReductionVectorRef() - { -- assert(mpDVR); -- size_t nCurWindowSize = mpDVR->GetRefRowSize(); -- std::string temp = Base::GetName() + "[gid0]"; -- ss << "tmp = "; -- // Special case count -- if (dynamic_cast<OpAverage*>(mpCodeGen.get())) -+ if (mpClmem2) - { -- ss << mpCodeGen->Gen2(temp, "tmp") << ";\n"; -- ss << "nCount = nCount-1;\n"; -- ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/ -- ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n"; -+ cl_int err; -+ err = clReleaseMemObject(mpClmem2); -+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); -+ mpClmem2 = nullptr; - } -- else if (dynamic_cast<OpCount*>(mpCodeGen.get())) -- ss << temp << "+ tmp"; -- else -- ss << mpCodeGen->Gen2(temp, "tmp"); -- ss << ";\n\t"; -- needBody = false; -- return nCurWindowSize; - } - -- virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ) -- { -- assert(Base::mpClmem == nullptr); -- -- openclwrapper::KernelEnv kEnv; -- openclwrapper::setKernelEnv(&kEnv); -- cl_int err; -- size_t nInput = mpDVR->GetArrayLength(); -- size_t nCurWindowSize = mpDVR->GetRefRowSize(); -- // create clmem buffer -- if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr) -- throw Unhandled(__FILE__, __LINE__); -- double* pHostBuffer = const_cast<double*>( -- mpDVR->GetArrays()[Base::mnIndex].mpNumericArray); -- size_t szHostBuffer = nInput * sizeof(double); -- Base::mpClmem = clCreateBuffer(kEnv.mpkContext, -- cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR, -- szHostBuffer, -- pHostBuffer, &err); -- SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer); -- -- mpClmem2 = clCreateBuffer(kEnv.mpkContext, -- CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, -- sizeof(double) * w, nullptr, nullptr); -- if (CL_SUCCESS != err) -- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); -- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w)); -+ size_t GetArrayLength() const { return mpDVR->GetArrayLength(); } - -- // reproduce the reduction function name -- std::string kernelName; -- if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) -- kernelName = Base::GetName() + "_reduction"; -- else -- kernelName = Base::GetName() + "_sum_reduction"; -- cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); -- if (err != CL_SUCCESS) -- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); -- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); -+ size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); } - -- // set kernel arg of reduction kernel -- // TODO(Wei Wei): use unique name for kernel -- cl_mem buf = Base::GetCLBuffer(); -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); -- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), -- static_cast<void*>(&buf)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ bool GetStartFixed() const { return bIsStartFixed; } - -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); -- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); -- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); -- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- // set work group size and execute -- size_t global_work_size[] = { 256, static_cast<size_t>(w) }; -- size_t const local_work_size[] = { 256, 1 }; -- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); -- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, -- global_work_size, local_work_size, 0, nullptr, nullptr); -- if (CL_SUCCESS != err) -- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); -- err = clFinish(kEnv.mpkCmdQueue); -- if (CL_SUCCESS != err) -- throw OpenCLError("clFinish", err, __FILE__, __LINE__); -- if (dynamic_cast<OpAverage*>(mpCodeGen.get())) -- { -- /*average need more reduction kernel for count computing*/ -- std::unique_ptr<double[]> pAllBuffer(new double[2 * w]); -- double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, -- mpClmem2, -- CL_TRUE, CL_MAP_READ, 0, -- sizeof(double) * w, 0, nullptr, nullptr, -- &err)); -- if (err != CL_SUCCESS) -- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); -- -- for (int i = 0; i < w; i++) -- pAllBuffer[i] = resbuf[i]; -- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); -- if (err != CL_SUCCESS) -- throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__); -- -- kernelName = Base::GetName() + "_count_reduction"; -- redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); -- if (err != CL_SUCCESS) -- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); -- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); -- -- // set kernel arg of reduction kernel -- buf = Base::GetCLBuffer(); -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); -- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), -- static_cast<void*>(&buf)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); -- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); -- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); -- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- -- // set work group size and execute -- size_t global_work_size1[] = { 256, static_cast<size_t>(w) }; -- size_t const local_work_size1[] = { 256, 1 }; -- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); -- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, -- global_work_size1, local_work_size1, 0, nullptr, nullptr); -- if (CL_SUCCESS != err) -- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); -- err = clFinish(kEnv.mpkCmdQueue); -- if (CL_SUCCESS != err) -- throw OpenCLError("clFinish", err, __FILE__, __LINE__); -- resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, -- mpClmem2, -- CL_TRUE, CL_MAP_READ, 0, -- sizeof(double) * w, 0, nullptr, nullptr, -- &err)); -- if (err != CL_SUCCESS) -- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); -- for (int i = 0; i < w; i++) -- pAllBuffer[i + w] = resbuf[i]; -- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); -- // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails? -- if (CL_SUCCESS != err) -- SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err)); -- if (mpClmem2) -- { -- err = clReleaseMemObject(mpClmem2); -- SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); -- mpClmem2 = nullptr; -- } -- mpClmem2 = clCreateBuffer(kEnv.mpkContext, -- cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR, -- w * sizeof(double) * 2, pAllBuffer.get(), &err); -- if (CL_SUCCESS != err) -- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); -- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get()); -- } -- // set kernel arg -- SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); -- err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2); -- if (CL_SUCCESS != err) -- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -- return 1; -- } -- -- ~ParallelReductionVectorRef() -- { -- if (mpClmem2) -- { -- cl_int err; -- err = clReleaseMemObject(mpClmem2); -- SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); -- mpClmem2 = nullptr; -- } -- } -- -- size_t GetArrayLength() const { return mpDVR->GetArrayLength(); } -- -- size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); } -- -- bool GetStartFixed() const { return bIsStartFixed; } -- -- bool GetEndFixed() const { return bIsEndFixed; } -+ bool GetEndFixed() const { return bIsEndFixed; } - - protected: - bool bIsStartFixed, bIsEndFixed; -@@ -4395,6 +4032,378 @@ bool FormulaGroupInterpreterOpenCL::inte - return aRes.pushResultToDocument(rDoc, rTopPos); - } - -+template<class Base> -+void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss ) -+{ -+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) -+ { -+ std::string name = Base::GetName(); -+ ss << "__kernel void " << name; -+ ss << "_reduction(__global double* A, " -+ "__global double *result,int arrayLength,int windowSize){\n"; -+ ss << " double tmp, current_result =" << -+ mpCodeGen->GetBottom(); -+ ss << ";\n"; -+ ss << " int writePos = get_group_id(1);\n"; -+ ss << " int lidx = get_local_id(0);\n"; -+ ss << " __local double shm_buf[256];\n"; -+ if (mpDVR->IsStartFixed()) -+ ss << " int offset = 0;\n"; -+ else // if (!mpDVR->IsStartFixed()) -+ ss << " int offset = get_group_id(1);\n"; -+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = offset + windowSize;\n"; -+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = windowSize + get_group_id(1);\n"; -+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ ss << " end = min(end, arrayLength);\n"; -+ -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " int loop = arrayLength/512 + 1;\n"; -+ ss << " for (int l=0; l<loop; l++){\n"; -+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -+ ss << " int loopOffset = l*512;\n"; -+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -+ ss << " tmp = legalize(" << mpCodeGen->Gen2( -+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; -+ ss << " tmp = legalize(" << mpCodeGen->Gen2( -+ "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n"; -+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -+ ss << " tmp = legalize(" << mpCodeGen->Gen2( -+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; -+ ss << " shm_buf[lidx] = tmp;\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " for (int i = 128; i >0; i/=2) {\n"; -+ ss << " if (lidx < i)\n"; -+ ss << " shm_buf[lidx] = "; -+ // Special case count -+ if (dynamic_cast<OpCount*>(mpCodeGen.get())) -+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -+ else -+ ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " current_result ="; -+ if (dynamic_cast<OpCount*>(mpCodeGen.get())) -+ ss << "current_result + shm_buf[0]"; -+ else -+ ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); -+ ss << ";\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " result[writePos] = current_result;\n"; -+ ss << "}\n"; -+ } -+ else -+ { -+ std::string name = Base::GetName(); -+ /*sum reduction*/ -+ ss << "__kernel void " << name << "_sum"; -+ ss << "_reduction(__global double* A, " -+ "__global double *result,int arrayLength,int windowSize){\n"; -+ ss << " double tmp, current_result =" << -+ mpCodeGen->GetBottom(); -+ ss << ";\n"; -+ ss << " int writePos = get_group_id(1);\n"; -+ ss << " int lidx = get_local_id(0);\n"; -+ ss << " __local double shm_buf[256];\n"; -+ if (mpDVR->IsStartFixed()) -+ ss << " int offset = 0;\n"; -+ else // if (!mpDVR->IsStartFixed()) -+ ss << " int offset = get_group_id(1);\n"; -+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = offset + windowSize;\n"; -+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = windowSize + get_group_id(1);\n"; -+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ ss << " end = min(end, arrayLength);\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " int loop = arrayLength/512 + 1;\n"; -+ ss << " for (int l=0; l<loop; l++){\n"; -+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -+ ss << " int loopOffset = l*512;\n"; -+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -+ ss << " tmp = legalize("; -+ ss << "(A[loopOffset + lidx + offset]+ tmp)"; -+ ss << ", tmp);\n"; -+ ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)"; -+ ss << ", tmp);\n"; -+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -+ ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)"; -+ ss << ", tmp);\n"; -+ ss << " shm_buf[lidx] = tmp;\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " for (int i = 128; i >0; i/=2) {\n"; -+ ss << " if (lidx < i)\n"; -+ ss << " shm_buf[lidx] = "; -+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " current_result ="; -+ ss << "current_result + shm_buf[0]"; -+ ss << ";\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " result[writePos] = current_result;\n"; -+ ss << "}\n"; -+ /*count reduction*/ -+ ss << "__kernel void " << name << "_count"; -+ ss << "_reduction(__global double* A, " -+ "__global double *result,int arrayLength,int windowSize){\n"; -+ ss << " double tmp, current_result =" << -+ mpCodeGen->GetBottom(); -+ ss << ";\n"; -+ ss << " int writePos = get_group_id(1);\n"; -+ ss << " int lidx = get_local_id(0);\n"; -+ ss << " __local double shm_buf[256];\n"; -+ if (mpDVR->IsStartFixed()) -+ ss << " int offset = 0;\n"; -+ else // if (!mpDVR->IsStartFixed()) -+ ss << " int offset = get_group_id(1);\n"; -+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = offset + windowSize;\n"; -+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) -+ ss << " int end = windowSize + get_group_id(1);\n"; -+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) -+ ss << " int end = windowSize;\n"; -+ ss << " end = min(end, arrayLength);\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " int loop = arrayLength/512 + 1;\n"; -+ ss << " for (int l=0; l<loop; l++){\n"; -+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; -+ ss << " int loopOffset = l*512;\n"; -+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; -+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; -+ ss << ", tmp);\n"; -+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)"; -+ ss << ", tmp);\n"; -+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; -+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; -+ ss << ", tmp);\n"; -+ ss << " shm_buf[lidx] = tmp;\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " for (int i = 128; i >0; i/=2) {\n"; -+ ss << " if (lidx < i)\n"; -+ ss << " shm_buf[lidx] = "; -+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " current_result ="; -+ ss << "current_result + shm_buf[0];"; -+ ss << ";\n"; -+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; -+ ss << " }\n"; -+ ss << " if (lidx == 0)\n"; -+ ss << " result[writePos] = current_result;\n"; -+ ss << "}\n"; -+ } -+} -+ -+template<class Base> -+size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader( -+ std::stringstream& ss, int nResultSize, bool& needBody ) -+ { -+ assert(mpDVR); -+ size_t nCurWindowSize = mpDVR->GetRefRowSize(); -+ std::string temp = Base::GetName() + "[gid0]"; -+ ss << "tmp = "; -+ // Special case count -+ if (dynamic_cast<OpAverage*>(mpCodeGen.get())) -+ { -+ ss << mpCodeGen->Gen2(temp, "tmp") << ";\n"; -+ ss << "nCount = nCount-1;\n"; -+ ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/ -+ ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n"; -+ } -+ else if (dynamic_cast<OpCount*>(mpCodeGen.get())) -+ ss << temp << "+ tmp"; -+ else -+ ss << mpCodeGen->Gen2(temp, "tmp"); -+ ss << ";\n\t"; -+ needBody = false; -+ return nCurWindowSize; -+ } -+ -+template<class Base> -+size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ) -+ { -+ assert(Base::mpClmem == nullptr); -+ -+ openclwrapper::KernelEnv kEnv; -+ openclwrapper::setKernelEnv(&kEnv); -+ cl_int err; -+ size_t nInput = mpDVR->GetArrayLength(); -+ size_t nCurWindowSize = mpDVR->GetRefRowSize(); -+ // create clmem buffer -+ if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr) -+ throw Unhandled(__FILE__, __LINE__); -+ double* pHostBuffer = const_cast<double*>( -+ mpDVR->GetArrays()[Base::mnIndex].mpNumericArray); -+ size_t szHostBuffer = nInput * sizeof(double); -+ Base::mpClmem = clCreateBuffer(kEnv.mpkContext, -+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR, -+ szHostBuffer, -+ pHostBuffer, &err); -+ SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer); -+ -+ mpClmem2 = clCreateBuffer(kEnv.mpkContext, -+ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, -+ sizeof(double) * w, nullptr, nullptr); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); -+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w)); -+ -+ // reproduce the reduction function name -+ std::string kernelName; -+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) -+ kernelName = Base::GetName() + "_reduction"; -+ else -+ kernelName = Base::GetName() + "_sum_reduction"; -+ cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); -+ if (err != CL_SUCCESS) -+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); -+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); -+ -+ // set kernel arg of reduction kernel -+ // TODO(Wei Wei): use unique name for kernel -+ cl_mem buf = Base::GetCLBuffer(); -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); -+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), -+ static_cast<void*>(&buf)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); -+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); -+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); -+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ // set work group size and execute -+ size_t global_work_size[] = { 256, static_cast<size_t>(w) }; -+ size_t const local_work_size[] = { 256, 1 }; -+ SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); -+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, -+ global_work_size, local_work_size, 0, nullptr, nullptr); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); -+ err = clFinish(kEnv.mpkCmdQueue); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clFinish", err, __FILE__, __LINE__); -+ if (dynamic_cast<OpAverage*>(mpCodeGen.get())) -+ { -+ /*average need more reduction kernel for count computing*/ -+ std::unique_ptr<double[]> pAllBuffer(new double[2 * w]); -+ double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, -+ mpClmem2, -+ CL_TRUE, CL_MAP_READ, 0, -+ sizeof(double) * w, 0, nullptr, nullptr, -+ &err)); -+ if (err != CL_SUCCESS) -+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); -+ -+ for (int i = 0; i < w; i++) -+ pAllBuffer[i] = resbuf[i]; -+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); -+ if (err != CL_SUCCESS) -+ throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__); -+ -+ kernelName = Base::GetName() + "_count_reduction"; -+ redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); -+ if (err != CL_SUCCESS) -+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); -+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); -+ -+ // set kernel arg of reduction kernel -+ buf = Base::GetCLBuffer(); -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); -+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), -+ static_cast<void*>(&buf)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); -+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); -+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); -+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ -+ // set work group size and execute -+ size_t global_work_size1[] = { 256, static_cast<size_t>(w) }; -+ size_t const local_work_size1[] = { 256, 1 }; -+ SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); -+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, -+ global_work_size1, local_work_size1, 0, nullptr, nullptr); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); -+ err = clFinish(kEnv.mpkCmdQueue); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clFinish", err, __FILE__, __LINE__); -+ resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, -+ mpClmem2, -+ CL_TRUE, CL_MAP_READ, 0, -+ sizeof(double) * w, 0, nullptr, nullptr, -+ &err)); -+ if (err != CL_SUCCESS) -+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); -+ for (int i = 0; i < w; i++) -+ pAllBuffer[i + w] = resbuf[i]; -+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); -+ // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails? -+ if (CL_SUCCESS != err) -+ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err)); -+ if (mpClmem2) -+ { -+ err = clReleaseMemObject(mpClmem2); -+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); -+ mpClmem2 = nullptr; -+ } -+ mpClmem2 = clCreateBuffer(kEnv.mpkContext, -+ cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR, -+ w * sizeof(double) * 2, pAllBuffer.get(), &err); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); -+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get()); -+ } -+ // set kernel arg -+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); -+ err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2); -+ if (CL_SUCCESS != err) -+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); -+ return 1; -+ } -+ - }} // namespace sc::opencl - - /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |