Skip to content

Commit

Permalink
FluidX3D v3.0 upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
ProjectPhysX committed Nov 16, 2024
1 parent 497331f commit 81d8047
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 53 deletions.
8 changes: 4 additions & 4 deletions DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
sudo apt update && sudo apt upgrade -y
sudo apt install -y g++ git make ocl-icd-libopencl1 ocl-icd-opencl-dev
mkdir -p ~/amdgpu
wget -P ~/amdgpu https://repo.radeon.com/amdgpu-install/6.1.3/ubuntu/jammy/amdgpu-install_6.1.60103-1_all.deb
wget -P ~/amdgpu https://repo.radeon.com/amdgpu-install/6.2.3/ubuntu/noble/amdgpu-install_6.2.60203-1_all.deb
sudo apt install -y ~/amdgpu/amdgpu-install*.deb
sudo amdgpu-install -y --usecase=graphics,rocm,opencl --opencl=rocr
sudo usermod -a -G render,video $(whoami)
Expand Down Expand Up @@ -60,12 +60,12 @@
- Option 1: Download and install the [oneAPI DPC++ Compiler](https://github.com/intel/llvm/releases?q=oneAPI+DPC%2B%2B+Compiler) and [oneTBB](https://github.com/oneapi-src/oneTBB/releases) with:
```bash
export OCLV="2024.18.6.0.02_rel"
export TBBV="2021.13.0"
export OCLV="2024.18.10.0.08_rel"
export TBBV="2022.0.0"
sudo apt update && sudo apt upgrade -y
sudo apt install -y g++ git make ocl-icd-libopencl1 ocl-icd-opencl-dev
sudo mkdir -p ~/cpurt /opt/intel/oclcpuexp_${OCLV} /etc/OpenCL/vendors /etc/ld.so.conf.d
sudo wget -P ~/cpurt https://github.com/intel/llvm/releases/download/2024-WW25/oclcpuexp-${OCLV}.tar.gz
sudo wget -P ~/cpurt https://github.com/intel/llvm/releases/download/2024-WW43/oclcpuexp-${OCLV}.tar.gz
sudo wget -P ~/cpurt https://github.com/oneapi-src/oneTBB/releases/download/v${TBBV}/oneapi-tbb-${TBBV}-lin.tgz
sudo tar -zxvf ~/cpurt/oclcpuexp-${OCLV}.tar.gz -C /opt/intel/oclcpuexp_${OCLV}
sudo tar -zxvf ~/cpurt/oneapi-tbb-${TBBV}-lin.tgz -C /opt/intel
Expand Down
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# FluidX3D

The fastest and most memory efficient lattice Boltzmann CFD software, running on all GPUs via [OpenCL](https://github.com/ProjectPhysX/OpenCL-Wrapper "OpenCL-Wrapper"). Free for non-commercial use.
The fastest and most memory efficient lattice Boltzmann CFD software, running on all GPUs and CPUs via [OpenCL](https://github.com/ProjectPhysX/OpenCL-Wrapper "OpenCL-Wrapper"). Free for non-commercial use.

<a href="https://youtu.be/-MkRBeQkLk8"><img src="https://img.youtube.com/vi/o3TPN142HxM/maxresdefault.jpg" width="50%"></img></a><a href="https://youtu.be/oC6U1M0Fsug"><img src="https://img.youtube.com/vi/oC6U1M0Fsug/maxresdefault.jpg" width="50%"></img></a><br>
<a href="https://youtu.be/XOfXHgP4jnQ"><img src="https://img.youtube.com/vi/XOfXHgP4jnQ/maxresdefault.jpg" width="50%"></img></a><a href="https://youtu.be/clAqgNtySow"><img src="https://img.youtube.com/vi/clAqgNtySow/maxresdefault.jpg" width="50%"></img></a>
<a href="https://youtu.be/XOfXHgP4jnQ"><img src="https://img.youtube.com/vi/XOfXHgP4jnQ/maxresdefault.jpg" width="50%"></img></a><a href="https://youtu.be/K5eKxzklXDA"><img src="https://img.youtube.com/vi/K5eKxzklXDA/maxresdefault.jpg" width="50%"></img></a>
(click on images to show videos on YouTube)

<details><summary>Update History</summary>
Expand Down Expand Up @@ -193,6 +193,13 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on
- fixed maximum buffer allocation size limit for AMD GPUs and in Intel CPU Runtime for OpenCL
- fixed wrong `Re<Re_max` info printout for 2D simulations
- minor fix in `bandwidth_bytes_per_cell_device()`
- [v3.0](https://github.com/ProjectPhysX/FluidX3D/releases/tag/v3.0) (16.11.2024) [changes](https://github.com/ProjectPhysX/FluidX3D/compare/v2.19...v3.0) (larger CPU/iGPU simulations)
- reduced memory footprint on CPUs and iGPU from 72 to 55 Bytes/cell (fused OpenCL host+device buffers for `rho`/`u`/`flags`), allowing 31% higher resolution in the same RAM capacity
- faster hardware-supported and faster fallback emulation atomic floating-point addition for `PARTICLES` extension
- hardened `calculate_f_eq()` against bad user input for `D2Q9`
- fixed velocity voxelization for overlapping geometry with different velocity
- fixed Remaining Time printout during paused simulation
- fixed CPU/GPU memory printout for CPU/iGPU simulations

</details>

Expand Down Expand Up @@ -759,6 +766,8 @@ section Orange Pi 5 Mali-G610 MP4
232 :active, 0, 232
section Samsung Mali-G72 MP18 (S9+)
230 :active, 0, 230
section 2x EPYC 9754
5179 :crit, 0, 5179
section 2x EPYC 9654
1814 :crit, 0, 1814
section 2x EPYC 7352
Expand All @@ -767,6 +776,12 @@ section 2x EPYC 7313
498 :crit, 0, 498
section 2x EPYC 7302
784 :crit, 0, 784
section 2x 6980P
7875 :done, 0, 7875
section 2x 6979P
8135 :done, 0, 8135
section 2x Platinum 8592+
3135 :done, 0, 3135
section 2x CPU Max 9480
2037 :done, 0, 2037
section 2x Platinum 8480+
Expand Down Expand Up @@ -993,10 +1008,14 @@ Colors: 🔴 AMD, 🔵 Intel, 🟢 Nvidia, ⚪ Apple, 🟡 ARM, 🟤 Glenfly
| 🟡&nbsp;Mali-G610&nbsp;MP4 (Orange&nbsp;Pi&nbsp;5) | 0.06 | 16 | 34 | 130 (58%) | 232 (52%) | 93 (21%) |
| 🟡&nbsp;Mali-G72&nbsp;MP18 (Samsung&nbsp;S9+) | 0.24 | 4 | 29 | 110 (59%) | 230 (62%) | 21 ( 6%) |
| | | | | | | |
| 🔴&nbsp;2x&nbsp;EPYC&nbsp;9754 | 50.79 | 3072 | 922 | 3276 (54%) | 5077 (42%) | 5179 (43%) |
| 🔴&nbsp;2x&nbsp;EPYC&nbsp;9654 | 43.62 | 1536 | 922 | 1381 (23%) | 1814 (15%) | 1801 (15%) |
| 🔴&nbsp;2x&nbsp;EPYC&nbsp;7352 | 3.53 | 512 | 410 | 739 (28%) | 106 ( 2%) | 412 ( 8%) |
| 🔴&nbsp;2x&nbsp;EPYC&nbsp;7313 | 3.07 | 128 | 410 | 498 (19%) | 367 ( 7%) | 418 ( 8%) |
| 🔴&nbsp;2x&nbsp;EPYC&nbsp;7302 | 3.07 | 128 | 410 | 784 (29%) | 336 ( 6%) | 411 ( 8%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;6980P | 98.30 | 6144 | 1690 | 7875 (71%) | 5112 (23%) | 5610 (26%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;6979P | 92.16 | 3072 | 1690 | 8135 (74%) | 4175 (19%) | 4622 (21%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;Platinum&nbsp;8592+ | 31.13 | 1024 | 717 | 3135 (67%) | 2359 (25%) | 2466 (26%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;CPU&nbsp;Max&nbsp;9480 | 27.24 | 256 | 614 | 2037 (51%) | 1520 (19%) | 1464 (18%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;Platinum&nbsp;8480+ | 28.67 | 512 | 614 | 2162 (54%) | 1845 (23%) | 1884 (24%) |
| 🔵&nbsp;2x&nbsp;Xeon&nbsp;Platinum&nbsp;8380 | 23.55 | 2048 | 410 | 1410 (53%) | 1159 (22%) | 1298 (24%) |
Expand Down
15 changes: 12 additions & 3 deletions src/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void Info::print_logo() const {
print("| "); print("\\ \\ / /", c); print(" |\n");
print("| "); print("\\ ' /", c); print(" |\n");
print("| "); print("\\ /", c); print(" |\n");
print("| "); print("\\ /", c); print(" FluidX3D Version 2.19 |\n");
print("| "); print("\\ /", c); print(" FluidX3D Version 3.0 |\n");
print("| "); print( "'", c); print(" Copyright (c) Dr. Moritz Lehmann |\n");
print("|-----------------------------------------------------------------------------|\n");
}
Expand All @@ -61,8 +61,17 @@ void Info::print_initialize(LBM* lbm) {
#else // FP32
collision += " (FP32/FP32)";
#endif // FP32
cpu_mem_required = (uint)(lbm->get_N()*(ulong)bytes_per_cell_host()/1048576ull); // reset to get valid values for consecutive simulations
gpu_mem_required = lbm->lbm_domain[0]->get_device().info.memory_used;
bool all_domains_use_ram = true; // reset cpu/gpu_mem_required to get valid values for consecutive simulations
for(uint d=0u; d<lbm->get_D(); d++) {
all_domains_use_ram = all_domains_use_ram&&lbm->lbm_domain[d]->get_device().info.uses_ram;
}
if(all_domains_use_ram) {
cpu_mem_required = lbm->get_D()*lbm->lbm_domain[0]->get_device().info.memory_used;
gpu_mem_required = 0u;
} else {
cpu_mem_required = (uint)(lbm->get_N()*(ulong)bytes_per_cell_host()/1048576ull);
gpu_mem_required = lbm->lbm_domain[0]->get_device().info.memory_used;
}
const float Re = lbm->get_Re_max();
println("|-----------------.-----------------------------------------------------------|");
println("| Grid Resolution | "+alignr(57u, to_string(lbm->get_Nx())+" x "+to_string(lbm->get_Ny())+" x "+to_string(lbm->get_Nz())+" = "+to_string(lbm->get_N()))+" |");
Expand Down
4 changes: 2 additions & 2 deletions src/lbm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1259,8 +1259,8 @@ void LBM_Domain::allocate_transfer(Device& device) { // allocate all memory for
if(Dy>1u) Amax = max(Amax, (ulong)Nz*(ulong)Nx); // Ay
if(Dz>1u) Amax = max(Amax, (ulong)Nx*(ulong)Ny); // Az

transfer_buffer_p = Memory<char>(device, Amax, max(transfers*(uint)sizeof(fpxx), 17u)); // only allocate one set of transfer buffers in plus/minus directions, for all x/y/z transfers
transfer_buffer_m = Memory<char>(device, Amax, max(transfers*(uint)sizeof(fpxx), 17u));
transfer_buffer_p = Memory<char>(device, Amax, max(transfers*(uint)sizeof(fpxx), 17u), true, true, 0, false); // only allocate one set of transfer buffers in plus/minus directions, for all x/y/z transfers
transfer_buffer_m = Memory<char>(device, Amax, max(transfers*(uint)sizeof(fpxx), 17u), true, true, 0, false); // these transfer buffers must not be zero-copy!

kernel_transfer[enum_transfer_field::fi ][0] = Kernel(device, 0u, "transfer_extract_fi" , 0u, t, transfer_buffer_p, transfer_buffer_m, fi);
kernel_transfer[enum_transfer_field::fi ][1] = Kernel(device, 0u, "transfer__insert_fi" , 0u, t, transfer_buffer_p, transfer_buffer_m, fi);
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void main_label(const double frametime) {
draw_label(ox, oy+i, "Steps " +alignr(31u, /************************************/ alignr(10u, info.lbm->get_t())+" ("+alignr(5, to_uint(1.0/info.runtime_lbm_timestep_smooth))+" Steps/s)"), c); i+=FONT_HEIGHT;
draw_label(ox, oy+i, "FPS " +alignr(33u, /************************************************************/ alignr(4u, to_uint(1.0/frametime))+" ("+alignr(5u, camera.fps_limit)+" fps max)"), c);
}
draw_label(2, camera.height-1*(FONT_HEIGHT)-1, "FluidX3D v2.19 Copyright (c) Dr. Moritz Lehmann", c);
draw_label(2, camera.height-1*(FONT_HEIGHT)-1, "FluidX3D v3.0 Copyright (c) Dr. Moritz Lehmann", c);
if(!key_H) {
draw_label(camera.width-16*(FONT_WIDTH)-1, 2, "Press H for Help", c);
} else {
Expand Down
Loading

0 comments on commit 81d8047

Please sign in to comment.