116 lines
4.5 KiB
Plaintext
116 lines
4.5 KiB
Plaintext
/***********************************************************************
|
|
Copyright (C) 2019 Hironori Fujimoto
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
***********************************************************************/
|
|
|
|
#include "flightlib/sensors/sgm_gpu/costs.h"
|
|
#include <stdio.h>
|
|
|
|
namespace sgm_gpu
|
|
{
|
|
|
|
__global__ void
|
|
__launch_bounds__(1024, 2)
|
|
CenterSymmetricCensusKernelSM2(const uint8_t *im, const uint8_t *im2, cost_t *transform, cost_t *transform2, const uint32_t rows, const uint32_t cols) {
|
|
const int idx = blockIdx.x*blockDim.x+threadIdx.x;
|
|
const int idy = blockIdx.y*blockDim.y+threadIdx.y;
|
|
|
|
const int win_cols = (32+LEFT*2); // 32+4*2 = 40
|
|
const int win_rows = (32+TOP*2); // 32+3*2 = 38
|
|
|
|
__shared__ uint8_t window[win_cols*win_rows];
|
|
__shared__ uint8_t window2[win_cols*win_rows];
|
|
|
|
const int id = threadIdx.y*blockDim.x+threadIdx.x;
|
|
const int sm_row = id / win_cols;
|
|
const int sm_col = id % win_cols;
|
|
|
|
const int im_row = blockIdx.y*blockDim.y+sm_row-TOP;
|
|
const int im_col = blockIdx.x*blockDim.x+sm_col-LEFT;
|
|
const bool boundaries = (im_row >= 0 && im_col >= 0 && im_row < rows && im_col < cols);
|
|
window[sm_row*win_cols+sm_col] = boundaries ? im[im_row*cols+im_col] : 0;
|
|
window2[sm_row*win_cols+sm_col] = boundaries ? im2[im_row*cols+im_col] : 0;
|
|
|
|
// Not enough threads to fill window and window2
|
|
const int block_size = blockDim.x*blockDim.y;
|
|
if(id < (win_cols*win_rows-block_size)) {
|
|
const int id = threadIdx.y*blockDim.x+threadIdx.x+block_size;
|
|
const int sm_row = id / win_cols;
|
|
const int sm_col = id % win_cols;
|
|
|
|
const int im_row = blockIdx.y*blockDim.y+sm_row-TOP;
|
|
const int im_col = blockIdx.x*blockDim.x+sm_col-LEFT;
|
|
const bool boundaries = (im_row >= 0 && im_col >= 0 && im_row < rows && im_col < cols);
|
|
window[sm_row*win_cols+sm_col] = boundaries ? im[im_row*cols+im_col] : 0;
|
|
window2[sm_row*win_cols+sm_col] = boundaries ? im2[im_row*cols+im_col] : 0;
|
|
}
|
|
|
|
__syncthreads();
|
|
cost_t census = 0;
|
|
cost_t census2 = 0;
|
|
if(idy < rows && idx < cols) {
|
|
for(int k = 0; k < CENSUS_HEIGHT/2; k++) {
|
|
for(int m = 0; m < CENSUS_WIDTH; m++) {
|
|
const uint8_t e1 = window[(threadIdx.y+k)*win_cols+threadIdx.x+m];
|
|
const uint8_t e2 = window[(threadIdx.y+2*TOP-k)*win_cols+threadIdx.x+2*LEFT-m];
|
|
const uint8_t i1 = window2[(threadIdx.y+k)*win_cols+threadIdx.x+m];
|
|
const uint8_t i2 = window2[(threadIdx.y+2*TOP-k)*win_cols+threadIdx.x+2*LEFT-m];
|
|
|
|
const int shft = k*CENSUS_WIDTH+m;
|
|
// Compare to the center
|
|
cost_t tmp = (e1 >= e2);
|
|
// Shift to the desired position
|
|
tmp <<= shft;
|
|
// Add it to its place
|
|
census |= tmp;
|
|
// Compare to the center
|
|
cost_t tmp2 = (i1 >= i2);
|
|
// Shift to the desired position
|
|
tmp2 <<= shft;
|
|
// Add it to its place
|
|
census2 |= tmp2;
|
|
}
|
|
}
|
|
if(CENSUS_HEIGHT % 2 != 0) {
|
|
const int k = CENSUS_HEIGHT/2;
|
|
for(int m = 0; m < CENSUS_WIDTH/2; m++) {
|
|
const uint8_t e1 = window[(threadIdx.y+k)*win_cols+threadIdx.x+m];
|
|
const uint8_t e2 = window[(threadIdx.y+2*TOP-k)*win_cols+threadIdx.x+2*LEFT-m];
|
|
const uint8_t i1 = window2[(threadIdx.y+k)*win_cols+threadIdx.x+m];
|
|
const uint8_t i2 = window2[(threadIdx.y+2*TOP-k)*win_cols+threadIdx.x+2*LEFT-m];
|
|
|
|
const int shft = k*CENSUS_WIDTH+m;
|
|
// Compare to the center
|
|
cost_t tmp = (e1 >= e2);
|
|
// Shift to the desired position
|
|
tmp <<= shft;
|
|
// Add it to its place
|
|
census |= tmp;
|
|
// Compare to the center
|
|
cost_t tmp2 = (i1 >= i2);
|
|
// Shift to the desired position
|
|
tmp2 <<= shft;
|
|
// Add it to its place
|
|
census2 |= tmp2;
|
|
}
|
|
}
|
|
|
|
transform[idy*cols+idx] = census;
|
|
transform2[idy*cols+idx] = census2;
|
|
}
|
|
}
|
|
|
|
} // namespace sgm_gpu
|
|
|