Examples of using catLGM

These demos illustrate the use of catLGM.m function to solve variety of machine learning problems. The code used to generate these demos can be downloaded here.

To run these demos, change directory to catLGM/ and do addpath(genpath(pwd)).

Return to catLGM main page

Contents

Binary Classification with Bayesian Logistic Regression

    clear all;
    setSeed(1)
    % generate data
    N = 5000; Ntrain = 4000;
    X = 2*rand(N, 2)-1;
    y = (sigmoid(X*[1; -1]))>0.5;
    y = ~y + 1; % Always specify binary data in [1 2] coding instead of [1 0]
    idx = randperm(N);
    ytrain = y(idx(1:Ntrain)); Xtrain = X(idx(1:Ntrain),:);
    ytest = y(idx(4950:end)); Xtest = X(idx(4950:end),:);
    ytest_bit = bsxfun(@eq, ytest, [1 2]);

    % specify model, algorithm and options
    model = 'bayesLogitReg';
    algos = {'var-boh','var-log','var-pw'};
    options = struct( 'maxItersInfer', 100, 'lowerBoundTolInfer', 0.001, 'displayInfer', 0);

    % Run each algorithm
    for i = 1:length(algos)
      % inference and prediction
      postDist{i} = catLGM('bayesLogitReg', 'infer', algos{i},  [Xtrain ytrain], options);
      pred(:,:,i) = catLGM('bayesLogitReg', 'predict', 'mc',  Xtest, [], postDist{i});

      % compute cross entropy error and store
      predErr(i) = -sum(sum(ytest_bit.*log2(pred(:,:,i)),2),1)/length(ytest);
      predProb(:,i) = pred(:,1,i);
      logLik(i) = postDist{i}.logLik;
      w(:,i) = postDist{i}.mean;
    end

    % least squares with (1,-1) encoding
    yc = -(2*ytrain-3); % change to (1,-1)
    w_ls = inv(Xtrain'*Xtrain)*Xtrain'*yc;
    w(:,end+1) = w_ls;
    p_ls = sigmoid(Xtest*w_ls);
    p_ls = [p_ls 1-p_ls];
    predErr(end+1) =-sum(sum(ytest_bit.*log2(p_ls),2),1)/length(ytest);
    predProb(:,end+1) = p_ls(:,1);
    algos{end+1} = 'least-square';
    logLik(end+1) = NaN;

    % display
    algos
    fprintf('Estimated Weights: \n '); w
    fprintf('Prediction error\n '); predErr
    fprintf('Training log-lik\n '); logLik

    % plot
    [tt1,idx] = sort(ytest);
    figure(1); clf;
    colors = {'r','g','k','c'};
    for i = 1:length(algos)
      subplot(2,2,i);
      stem(predProb(idx,i),'r', 'color', colors{i});
      hold on;
      plot(ytest_bit(idx,1),'.','linewidth',4);
      xlim([0 length(ytest)+1]); ylim([-0.3 1.3]);
      title(sprintf('Prediction Prob %s', algos{i}));
    end
algos = 

    'var-boh'    'var-log'    'var-pw'    'least-square'

Estimated Weights: 
 
w =

    9.4560   11.0330   11.2652    1.0153
   -9.4208  -11.0141  -11.2466   -0.9932

Prediction error
 
predErr =

    0.2089    0.1847    0.1812    0.7174

Training log-lik
 
logLik =

 -420.2152 -416.5018 -450.3729       NaN

Multi-Class classification using Bayesian Multinomial-Logit Regression

    clear all;
    % generate data
    N = 1000; F = 5; nClass = 4;
    X = 2*rand(N, F)-1;
    beta = randn(nClass,F);
    eta = X*beta';
    lse = logsumexp(eta,2);
    prob = bsxfun(@minus, eta, lse);
    prob = exp(prob);
    [junk,y] = max(prob,[],2);
    Ntest = 50; Ntrain = ceil(0.8*N);
    idx = randperm(N);
    ytrain = y(idx(1:Ntrain));
    Xtrain = X(idx(1:Ntrain),:);
    ytest = y(idx(N-Ntest+1:end));
    Xtest = X(idx(N-Ntest+1:end),:);
    ytest_bit = bsxfun(@eq, ytest, [1:nClass]);

    % specify model, algorithm and options
    model = 'bayesMultiLogitReg';
    algos = {'var-boh','var-log'};
    options = struct('maxItersInfer', 1000, 'lowerBoundTolInfer', 1e-5, 'displayInfer', 0);

    % Run each algorithm
    for i = 1:length(algos)
      postDist{i} = catLGM(model, 'infer', algos{i},  [Xtrain ytrain], options);
      predProb(:,:,i) = catLGM(model, 'predict', 'mc',  Xtest, [], postDist{i});

      % compute cross entropy error and store
      predErr(i) = -sum(sum(ytest_bit.*log2(predProb(:,:,i)),2),1)/length(ytest);
      logLik(i) = postDist{i}.logLik;
      w(:,i) = postDist{i}.mean;
    end

    % display
    algos
    w
    predErr
    logLik

    % plot
    figure(1); clf;
    nr = 1; nc = size(predProb,3) + 1;
    subplot(nr,nc,1);
    imagesc(ytest_bit); colorbar; caxis([0 1]);
    title('test data');
    for i = 1:size(predProb,3)
      subplot(nr,nc,i+1);
      imagesc(predProb(:,:,i)); colorbar; caxis([0 1]);
      title(sprintf('predProb %s', algos{i}));
    end
algos = 

    'var-boh'    'var-log'


w =

    5.2856    5.2865
   -0.5911   -0.5971
   -3.0405   -3.0356
   -1.1945   -1.1917
    0.5754    0.5728
   -4.4748   -4.5063
   -4.0890   -4.1053
   -2.2287   -2.2379
    1.0797    1.0892
   -0.9744   -0.9858
    3.9102    3.9116
   -7.2525   -7.2706
    0.3131    0.3162
   -3.6087   -3.6150
    2.1769    2.1784


predErr =

    0.3414    0.3413


logLik =

 -303.3138 -301.7505

Multi-Class Gaussian Process Classification

    clear all;
    % generate data
    N = 1000; F = 2; nClass = 4;
    X = randn(N, F);
    beta = [0 0; 1 0; 0 1; -1 1];
    eta = X*beta';
    lse = logsumexp(eta,2);
    prob = bsxfun(@minus, eta, lse);
    prob = exp(prob);
    [junk,y] = max(prob,[],2);
    Ntrain = ceil(N*0.1);
    idx = randperm(N);
    ytrain = y(idx(1:Ntrain));
    Xtrain = X(idx(1:Ntrain),:);
    ytest = y(idx(950:end));
    Xtest = X(idx(950:end),:);
    ytest_bit = bsxfun(@eq, ytest, [1:nClass]);

    % mean and covariance functions and their hyper-parameters
    params.meanfunc = {@meanZero};
    params.covfunc = {@covSEiso};
    params.hyp.mean = [];
    params.hyp.cov = 1*ones(nClass-1,2);
    params.others.nClass = nClass;

    % Specify model and algorithm
    models = {'multiLogitGP', 'multiLogitGP', 'stickLogitGP'};
    algos = {'var-boh', 'var-log', 'var-pw'};
    options = struct('maxItersInfer', 1000, 'lowerBoundTolInfer', 1e-5, 'displayInfer', 0);

    % Run each algorithm
    for i = 1:length(algos)
      inferOut{i} = catLGM(models{i}, 'infer', algos{i}, [Xtrain ytrain], options, params);
      predProb(:,:,i) = catLGM(models{i}, 'predict', 'mc',  Xtest, [], inferOut{i});
      predErr(i) = computeCrossEntropy(predProb(:,:,i)', ytest, nClass);
      logLik(i) = inferOut{i}.logLik;
    end

    % display
    algos
    predErr
    logLik

    % plot
    figure(2); clf;
    nr = 1; nc = size(predProb,3) + 1;
    subplot(nr,nc,1);
    imagesc(ytest_bit); colorbar; caxis([0 1]);
    title('test data');
    for i = 1:size(predProb,3)
      subplot(nr,nc,i+1);
      imagesc(predProb(:,:,i)); colorbar; caxis([0 1]);
      title(sprintf('predProb %s', algos{i}));
    end
algos = 

    'var-boh'    'var-log'    'var-pw'


predErr =

    0.6974    0.6864    0.7562


logLik =

  -82.1167  -75.2715  -75.8875

Binary Latent Gaussian Graphical Model

    clear all;
    % generate data
    D=5;
    mean_ = [2 1 0.5 0 0]';
    X1 = [1 1 1; 0 0 0]; X2 = [0 0; 0 1; 1 0; 1 1];
    X=[X1(repmat(1:2,4,1),:), repmat(X2,[2,1])];
    C=7*cov(X)+eye(D);
    covMat = C;
    S  = 100000;
    z  = mvnrnd(mean_,C,S);
    az = sigmoid(z')';
    Y = (az>rand(S,D))';

    % Always change binary data to [1 2] coding instead of [1 0]
    Y = ~Y + 1;

    % specify model, algos and options
    model = 'binaryLogitLGGM';
    algos = {'var-boh','var-log','var-pw'};
    options = struct('lowerBoundTolLearn', 0.01,'displayLearn',0);
    nIters = [200 50 50];

    % run each algorithm
    for i = 1:length(algos)
      options.maxItersLearn = nIters(i);
      out{i} = catLGM(model, 'learn', algos{i},  Y, options);
      mean_(:,i+1) = out{i}.params.mean;
      covMat(:,:,i+1) = out{i}.params.covMat;
    end

    % plot
    figure(1); clf;
    h(1) = semilogx(out{1}.time, out{1}.logLik, 'ro-', 'linewidth',2);
    hold on
    h(2) = semilogx(out{2}.time, out{2}.logLik, 'bs-', 'linewidth',2);
    h(3) = semilogx(out{3}.time, out{3}.logLik, 'gx-', 'linewidth',2);
    xlim([0.05 100]);
    grid on;
    legend(h, algos, 'location', 'southeast');
    xlabel('time (s)');
    ylabel('Lower bound to logLik');
    fprintf('Estimated mean\n');
    ['truth' algos]
    mean_
    figure(2); clf;
    nr = 2; nc = 2;
    for i = 1:size(mean_,2)
      subplot(nr,nc,i)
      imagesc(covMat(:,:,i)); colorbar; caxis([0 max(C(:))]);
      if i ~=1; title(sprintf('Est CovMat - %s', algos{i-1}));
      else title('True CovMat');
      end
    end
Estimated mean

ans = 

    'truth'    'var-boh'    'var-log'    'var-pw'


mean_ =

    2.0000    1.3461    0.9507    1.6454
    1.0000    0.6766    0.5080    0.9178
    0.5000    0.3350    0.2318    0.4774
         0    0.0015   -0.0982   -0.0005
         0    0.0052   -0.0949    0.0035

Categorical Factor Analysis

    clear all
    % D dimensions with M categories and N data points
    D = 2; M = 4;
    nC = M*ones(D,1);
    N = 10000;
    L = (M-1)*D;
    X = [eye(M-1) eye(M-1)];
    C = 20*cov(X) + eye(D*(M-1));
    covMat = C;
    mean_ = zeros(L,1);
    z = mvnrnd(mean_(:)', covMat, N);
    for d = 1:D
      idx = [1:M-1] + (d-1)*(M-1);
      eta_d = [z(:,idx)'; zeros(1,N)];
      lse = logsumexp(eta_d,1);
      prob = exp(bsxfun(@minus, eta_d, lse));
      [junk, Y(:,d)] = max(prob);
    end
    Y = Y';

    % compute true data distribution
    [Yt,I,J]  = unique(Y','rows');
    dist(1,:) = hist(J,1:size(Yt,1))/size(Y,2);

    % specify models, and corresponding algos and options
    models = {'multiLogitFA','multiLogitFA','stickLogitFA'};
    algos = {'var-boh','var-log','var-pw'};
    options = struct('lowerBoundTolLearn', 0.01,...
                     'displayLearn',0,...
                     'maxItersInfer',10,...
                     'maxItersLearn', 100,...
                     'latentDim', 2);

    % run algorithms
    for i = 1:length(algos)
      out{i} = catLGM(models{i}, 'learn', algos{i},  Y, options);
      mean_ = out{i}.params.mean;
      covMat = out{i}.params.covMat;
      W = out{i}.params.loadingMat;
      bias = out{i}.params.bias;
      [dist(i+1,:), Y1] = computeCatDist_mc(models{i}, mean_, covMat, W, bias, nC);
    end

    % plot
    figure(1);
    nr = 2; nc = 2;
    subplot(nr,nc,1)
    bh = bar(dist(1,:), 'facecolor', 'k', 'edgecolor', 'k');
    ylim([0 .30]); xlim([0 17]);
    grid on;
    hx = xlabel('Bit Patterns');
    hy = ylabel('Probability');
    ht = title('truth');
    for i = 2:length(algos)+1
      subplot(nr,nc,i)
      bh = bar(dist(i,:), 'facecolor', 'k', 'edgecolor', 'k');
      ylim([0 .3]); xlim([0 17]);
      grid on;
      hx = xlabel('Bit Patterns');
      hy = ylabel('Probability');
      ht = title(algos{i-1});
    end

Missing data imputation

    clear all; close all;
    % genearate data
    Y = [ repmat([1; 1; 4; 1; 4], 1, 30)...
          repmat([2; 2; 3; 3; 2], 1, 30)...
          repmat([3; 3; 2; 2; 3], 1, 30)...
          repmat([2; 1; 1; 2; 1], 1, 30)];
    nC = [3; 3; 4; 3; 4];
    Ybitvec = encodeDataOneOfM(Y, nC, 'M');
    [D,N] = size(Y);

    % create one missing entry in part of the data
    d = unidrnd(D,1,N); % one missing dim in each data point
    ratio = 0.5; % ratio of missing data examples
    Ymiss = Y;
    nMiss = 0;
    for i = 1:size(Y,2)
      if rand <ratio
        Ymiss(d(:,i),i) = NaN;
        nMiss = nMiss + length(unique(d(:,i))); % #miss
      end
    end

    % specify models, algorithms, and options for imputation
    models = {'multiLogitFA', 'multiLogitFA', 'stickLogitFA'};
    algos = {'var-boh', 'var-log', 'var-pw'};
    colors = {'bx-','ko-','rs-'};
    nIters = [35 12 3];
    options = struct('maxItersLearn', 5,...
                     'lowerBoundTolLearn', 0.001,...
                     'displayLearn',0,...
                     'maxItersInfer',10,...
                     'maxItersMstep',5,...
                     'latentDim',5);

    % plot true data
    figure(2); clf;
    nr = 1; nc = length(algos)+1;
    subplot(nr,nc,1);
    imagesc(Ybitvec');
    title('true data');
    figure(1); clf;

    % data imputation
    for m = 1:length(algos)
      fprintf('EM for Parameter learning\n');
      params = [];
      initVec = [];
      algos{m}
      for iter = 1:nIters(m)
        % warm start from previous iterations of EM
        options.initVec = initVec;

        % learn model
        learnOut = catLGM(models{m}, 'learn', algos{m}, Ymiss, options, params);

        % read output
        params = learnOut.params; % learned parameter
        postDist = learnOut.postDist; % post Dist
        initVec = learnOut.ss.initVec; % for next iteration
        trLogLik(iter) = learnOut.logLik(end);
        tt(iter) = learnOut.time(end);

        % impute data
        pred = catLGM(models{m}, 'impute', 'mc', Ymiss, [], params, postDist);
        err(iter) = -sum(sum(Ybitvec.*log2(max(pred,eps))))/nMiss;

        % plot
        figure(1);
        subplot(2,1,1);
        hold on
        h1(m) = semilogx(cumsum(tt), trLogLik, colors{m}, 'linewidth',2);
        grid on;
        title('train log lik');
        subplot(2,1,2);
        hold on
        grid on;
        semilogx(cumsum(tt), err, colors{m}, 'linewidth',2);
        title('test error');
        drawnow;
        figure(2);
        subplot(nr,nc,m+1);
        imagesc(pred');
        title(sprintf('imputation-%s', algos{m}));
        drawnow;
      end
      clear err trLogLik tt;
    end

    figure(1); subplot(2,1,1);
    legend(h1, algos, 'location', 'southeast');
EM for Parameter learning

ans =

var-boh

EM for Parameter learning

ans =

var-log

EM for Parameter learning

ans =

var-pw