Thursday, 1 May 2014

Belief propagation for stereo tutorial

1. Loopy belief propagation, Markov Random Field, stereo vision
A very nice and detailed tutorial about using belief propagation and MRF to solve the stereo problem. Some maths have been presented clearly and simply. A self-contained belief propagation code for stereo is attached (main.cpp ).

2. Stereovision reconstruction system
A project implementing paper: "Segment-Based Stereo Matching Using Belief Propagation and a Self-Adapting Dissimilarity Measure (ICPR 2006)" using OpenCV.

Loopy Belief Propagation code:

A MRF is a grid of node with same size as the image. For each node in the MRF, 5 link is construct to its neighbour and each link contains 'number of label' cells for storing massage.
 
struct Pixel
{
    // Each pixel has 5 'message box' to store incoming data
    TYPE msg[5][LABELS];
    int best_assignment;
};

struct MRF2D
{
    std::vector  grid;
    int width, height;
};

For every input image pair, we need to initialize the MRF correspondingly. For all the message at the very beginning stage, all message are initialized to 0 (or 1 depends on optimization) which means each node does not have any opinion about its neighbors. For the messages of each node itself, we initialize them based on the DataCost function. The DataCost function can be as simple as the intensity difference.

void InitDataCost(const std::string &left_file, const std::string &right_file, MRF2D &mrf)
{
    // Cache the data cost results so we don't have to recompute it every time

    // Force greyscale
    cv::Mat left = cv::imread(left_file.c_str(), 0);
    cv::Mat right = cv::imread(right_file.c_str(), 0);

    if(!left.data) {
        cerr << "Error reading left image" << endl;
        exit(1);
    }

    if(!right.data) {
        cerr << "Error reading right image" << endl;
        exit(1);
    }

    assert(left.channels() == 1);

    mrf.width = left.cols;
    mrf.height = left.rows;

    int total = mrf.width*mrf.height;

    mrf.grid.resize(total);

    // Initialise all messages to zero
    for(int i=0; i < total; i++) {
        for(int j=0; j < 5; j++) {
            for(int k=0; k < LABELS; k++) {
                mrf.grid[i].msg[j][k] = 0;
            }
        }
    }

    // Add a border around the image
    int border = LABELS;

    for(int y=border; y < mrf.height-border; y++) {
        for(int x=border; x < mrf.width-border; x++) {
            for(int i=0; i < LABELS; i++) {
                mrf.grid[y*left.cols+x].msg[DATA][i] = DataCostStereo(left, right, x, y, i);
            }
        }
    }
}

For propagating the message at each node to different direction (LEFT/RIGHT/UP/DOWN), 'SendMsg' function iterate every possible labels l for the corresponding link (each link contains multiple messages). For each label,we calculate the minimum cost/energy (Min-Sum BP) out of all possible labels l'. This minimum cost/energy is recorded for that label l of the link. Then use this minimum cost to update the message of the neighbor node so that link A-B and B-A has same message now.

void SendMsg(MRF2D &mrf, int x, int y, DIRECTION direction)
{
    TYPE new_msg[LABELS];

    int width = mrf.width;

    for(int i=0; i < LABELS; i++) {
        TYPE min_val = UINT_MAX;

        for(int j=0; j < LABELS; j++) {
            TYPE p = 0;

            p += SmoothnessCost(i,j);
            p += mrf.grid[y*width+x].msg[DATA][j];

            // Exclude the incoming message direction that we are sending to
            if(direction != LEFT) p += mrf.grid[y*width+x].msg[LEFT][j];
            if(direction != RIGHT) p += mrf.grid[y*width+x].msg[RIGHT][j];
            if(direction != UP) p += mrf.grid[y*width+x].msg[UP][j];
            if(direction != DOWN) p += mrf.grid[y*width+x].msg[DOWN][j];

            min_val = std::min(min_val, p);
        }

        new_msg[i] = min_val;
    }

    for(int i=0; i < LABELS; i++) {
        switch(direction) {
            case LEFT:
            mrf.grid[y*width + x-1].msg[RIGHT][i] = new_msg[i];
            break;

            case RIGHT:
            mrf.grid[y*width + x+1].msg[LEFT][i] = new_msg[i];
            break;

            case UP:
            mrf.grid[(y-1)*width + x].msg[DOWN][i] = new_msg[i];
            break;

            case DOWN:
            mrf.grid[(y+1)*width + x].msg[UP][i] = new_msg[i];
            break;

            default:
            assert(0);
            break;
        }
    }
}
For each BP iteration we propagate the message from Left-Right, Right-Left, Up-Down and Down-Up.

void BP(MRF2D &mrf, DIRECTION direction)
{
    int width = mrf.width;
    int height = mrf.height;

    switch(direction) {
        case RIGHT:
        for(int y=0; y < height; y++) {
            for(int x=0; x < width-1; x++) {
                SendMsg(mrf, x, y, direction);
            }
        }
        break;

        case LEFT:
        for(int y=0; y < height; y++) {
            for(int x=width-1; x >= 1; x--) {
                SendMsg(mrf, x, y, direction);
            }
        }
        break;

        case DOWN:
        for(int x=0; x < width; x++) {
            for(int y=0; y < height-1; y++) {
                SendMsg(mrf, x, y, direction);
            }
        }
        break;

        case UP:
        for(int x=0; x < width; x++) {
            for(int y=height-1; y >= 1; y--) {
                SendMsg(mrf, x, y, direction);
            }
        }
        break;

        case DATA:
        assert(0);
        break;
    }
}


At last, we assign each node (pixel) with the label that minimize the global energy.

TYPE MAP(MRF2D &mrf)
{
    // Finds the MAP assignment as well as calculating the energy

    // MAP assignment
    for(size_t i=0; i < mrf.grid.size(); i++) {
        TYPE best = std::numeric_limits::max();
        for(int j=0; j < LABELS; j++) {
            TYPE cost = 0;

            cost += mrf.grid[i].msg[LEFT][j];
            cost += mrf.grid[i].msg[RIGHT][j];
            cost += mrf.grid[i].msg[UP][j];
            cost += mrf.grid[i].msg[DOWN][j];
            cost += mrf.grid[i].msg[DATA][j];

            if(cost < best) {
                best = cost;
                mrf.grid[i].best_assignment = j;
            }
        }
    }

    int width = mrf.width;
    int height = mrf.height;

    // Energy
    TYPE energy = 0;

    for(int y=0; y < mrf.height; y++) {
        for(int x=0; x < mrf.width; x++) {
            int cur_label = mrf.grid[y*width+x].best_assignment;

            // Data cost
            energy += mrf.grid[y*width+x].msg[DATA][cur_label];

            if(x-1 >= 0)     energy += SmoothnessCost(cur_label, mrf.grid[y*width+x-1].best_assignment);
            if(x+1 < width)  energy += SmoothnessCost(cur_label, mrf.grid[y*width+x+1].best_assignment);
            if(y-1 >= 0)     energy += SmoothnessCost(cur_label, mrf.grid[(y-1)*width+x].best_assignment);
            if(y+1 < height) energy += SmoothnessCost(cur_label, mrf.grid[(y+1)*width+x].best_assignment);
        }
    }

    return energy;
}

Main program which calls above function:
int main()
{
    MRF2D mrf;

    InitDataCost("tsukuba-imL.png", "tsukuba-imR.png", mrf);

    for(int i=0; i < BP_ITERATIONS; i++) {
        BP(mrf, RIGHT);
        BP(mrf, LEFT);
        BP(mrf, UP);
        BP(mrf, DOWN);

        TYPE energy = MAP(mrf);
    }

    cv::Mat output = cv::Mat::zeros(mrf.height, mrf.width, CV_8U);

    for(int y=LABELS; y < mrf.height-LABELS; y++) {
        for(int x=LABELS; x < mrf.width-LABELS; x++) {
            // Increase the intensity so we can see it
            output.at(y,x) = mrf.grid[y*mrf.width+x].best_assignment * (256/LABELS);
        }
    }

    cv::namedWindow("main", CV_WINDOW_AUTOSIZE);
    cv::imshow("main", output);
    cv::waitKey(0);

    cout << "Saving results to output.png" << endl;
    cv::imwrite("output.png", output);

    return 0;
}

No comments:

Post a Comment