{"id":18642,"date":"2018-12-09T15:01:03","date_gmt":"2018-12-09T13:01:03","guid":{"rendered":"https:\/\/hgpu.org\/?p=18642"},"modified":"2018-12-09T15:01:03","modified_gmt":"2018-12-09T13:01:03","slug":"optimization-of-a-discontinuous-finite-element-solver-with-opencl-and-starpu","status":"publish","type":"post","link":"https:\/\/hgpu.org\/?p=18642","title":{"rendered":"Optimization of a discontinuous finite element solver with OpenCL and StarPU"},"content":{"rendered":"<p>schnaps is a finite element solver designed to simulate various physical phenomena. It is designed to run on hybrid computers made of several CPUs and GPUs. In order to address the hybrid architectures we rely on the StarPU runtime. StarPU allows to optimize in an incremental way a sequential algorithm in order to migrate to multicore parallelism and then to hybrid computing with OpenCL accelerators. StarPU proposes several task scheduling strategies in order to efficiently exploit the available computing power. We present the design of schnaps and the performance gain that we have obtained in electromagnetic simulations thanks to OpenCL codelets.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>schnaps is a finite element solver designed to simulate various physical phenomena. It is designed to run on hybrid computers made of several CPUs and GPUs. In order to address the hybrid architectures we rely on the StarPU runtime. StarPU allows to optimize in an incremental way a sequential algorithm in order to migrate to [&hellip;]<\/p>\n","protected":false},"author":351,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_feature_clip_id":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"jetpack_post_was_ever_published":false},"categories":[36,11,90,3],"tags":[1787,1782,555,20,1972,2014,1793,176,854],"class_list":["post-18642","post","type-post","status-publish","format-standard","hentry","category-algorithms","category-computer-science","category-opencl","category-paper","tag-algorithms","tag-computer-science","tag-hybrid-computing","tag-nvidia","tag-nvidia-geforce-gtx-1050-ti","tag-nvidia-quadro-p-6000","tag-opencl","tag-package","tag-task-scheduling"],"views":2486,"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts\/18642","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/users\/351"}],"replies":[{"embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=18642"}],"version-history":[{"count":0,"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts\/18642\/revisions"}],"wp:attachment":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=18642"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=18642"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=18642"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}