{"id":11787,"date":"2014-04-03T20:58:26","date_gmt":"2014-04-03T17:58:26","guid":{"rendered":"http:\/\/hgpu.org\/?p=11787"},"modified":"2014-04-03T20:58:26","modified_gmt":"2014-04-03T17:58:26","slug":"experiments-with-massively-parallel-matrix-multiplication","status":"publish","type":"post","link":"https:\/\/hgpu.org\/?p=11787","title":{"rendered":"Experiments with Massively Parallel Matrix Multiplication"},"content":{"rendered":"<p>This paper presents initial experiments in implementing two notable matrix multiplication algorithms \u2013 the DNS algorithm and Cannon\u2019s algorithm \u2013 using NVIDIA\u2019s general-purpose graphics processing units (GPGPUs) and CUDA development platform. We demonstrate that these implementations are comparable with traditional methods in terms of computational expense and may scale better than traditional techniques.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>This paper presents initial experiments in implementing two notable matrix multiplication algorithms \u2013 the DNS algorithm and Cannon\u2019s algorithm \u2013 using NVIDIA\u2019s general-purpose graphics processing units (GPGPUs) and CUDA development platform. We demonstrate that these implementations are comparable with traditional methods in terms of computational expense and may scale better than traditional techniques.<\/p>\n","protected":false},"author":351,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_feature_clip_id":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"jetpack_post_was_ever_published":false},"categories":[36,89,157,3],"tags":[1787,14,37,1796,20,1585],"class_list":["post-11787","post","type-post","status-publish","format-standard","hentry","category-algorithms","category-nvidia-cuda","category-mathematics","category-paper","tag-algorithms","tag-cuda","tag-linear-algebra","tag-mathematics","tag-nvidia","tag-nvidia-tesla-c2070"],"views":2237,"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts\/11787","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/users\/351"}],"replies":[{"embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=11787"}],"version-history":[{"count":0,"href":"https:\/\/hgpu.org\/index.php?rest_route=\/wp\/v2\/posts\/11787\/revisions"}],"wp:attachment":[{"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=11787"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=11787"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/hgpu.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=11787"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}