The standard matrix multiplication for an n x n matrix results in a time complexity with a factor of 3. This means if you double the size of the matrix, you'll require eight times more time. Strassen's algorithm is one technique that optimizes matrix multiplication, reducing time complexity to a factor of 2.8.
The algorithm is named after the German mathematician Volker Strassen, who introduced it.
Conditions
- The matrices should be square.
- Matrix size has to be a power of two. If the matrices do not meet these constraints, they are padded with rows and columns of zeros.
How it Works
- If the matrices do not meet the above conditions, pad with rows and columns of zeros to get nice square matrices.
- Check if the matrix size is smaller than the threshold. If so use the standard multiplication.
- If not, proceed to Strassen's algorithm.
- Partition matrices into four equal size blocks.
- Use these partitions to calculate the seven matrices.
- Using the seven matrices, calculate the final matrix.
- Remove additional rows and columns (which are a result of padded zeros) to get the final matrix.
Pros
The final matrix is calculated in seven multiplication steps, whereas the standard method uses eight multiplications. This results in reduced time complexity, hence optimizing multiplication.
Cons
The threshold at which the algorithm falls back to the standard method has a significant effect in optimization, particularly when matrices of varying sizes need to be calculated. Threshold measures that work for large matrices may not work for smaller matrices.
Since most matrices usually require to be padded with zeros prior to applying the algorithm, the matrix size may become very large, hence increasing the time complexity.
Implementation
Here is a C++ implementation of Strassen's algorithm, using it to multiply matrices of size 1000. The results can be compared with those from standard multiplication.
//optimized matrix multiplication using Strassen algorithm
#include <iostream>
#include <ctime>
#include <cstdlib>
#include <vector>
#include <cmath>
using namespace std;
//size at which the sequential multiplication is used instead of recursive Strassen
int thresholdSize = 128;
void initMat(vector< vector<double> > &a, vector< vector<double> > &b, int n)
{
// initialize matrices and fill them with random values
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
a[i][j] = (double)rand()/RAND_MAX*10;
b[i][j] = (double)rand()/RAND_MAX*10;
}
}
}
void multiplyMatStandard(vector< vector<double> > &a, vector< vector<double> > &b, vector< vector<double> > &c, int n)
{
// standard matrix multiplication: C <- C + A x B
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
double temp = 0;
for (int k = 0; k < n; ++k) {
temp += a[i][k] * b[k][j];
}
c[i][j]=temp;
}
}
}
int getNextPowerOfTwo(int n)
{
return pow(2, int(ceil(log2(n))));
}
void fillZeros(vector< vector<double> > &newA, vector< vector<double> > &newB,
vector< vector<double> > &a, vector< vector<double> > &b, int n)
{
//pad matrix with zeros
for (int i=0; i<n; i++){
for (int j=0; j<n; j++){
newA[i][j] = a[i][j];
newB[i][j] = b[i][j];
}
}
}
void add(vector< vector<double> > &a, vector< vector<double> > &b,
vector< vector<double> > &resultMatrix, int n)
{
for(int i=0; i<n; i++){
for(int j=0; j<n; j++){
resultMatrix[i][j] = a[i][j] + b[i][j];
}
}
}
void subtract(vector< vector<double> > &a, vector< vector<double> > &b,
vector< vector<double> > &resultMatrix, int n)
{
for(int i=0; i<n; i++){
for(int j=0; j<n; j++){
resultMatrix[i][j] = a[i][j] - b[i][j];
}
}
}
void multiplyStrassen(vector< vector<double> > &a,
vector< vector<double> > &b, vector< vector<double> > &c, int n)
{
if(n<=thresholdSize) {
multiplyMatStandard(a, b, c, n);
}
else {
//expand and fill with zeros if matrix size is not a power of two
int newSize = getNextPowerOfTwo(n);
vector< vector<double> >
newA(newSize, vector<double>(newSize)),
newB(newSize, vector<double>(newSize)),
newC(newSize, vector<double>(newSize));
if(n==newSize) {
//matrix size is already a power of two
newA = a;
newB = b;
}
else {
fillZeros(newA, newB, a, b, n);
}
//initialize submatrices
int blockSize = newSize/2; //size for a partition matrix
vector<double> block (blockSize);
vector< vector<double> >
/*partitions of newA*/
a11(blockSize, block), a12(blockSize, block),
a21(blockSize, block), a22(blockSize, block),
/*partitions of newB*/
b11(blockSize, block), b12(blockSize, block),
b21(blockSize, block), b22(blockSize, block),
/*partitions of newC*/
c11(blockSize, block), c12(blockSize, block),
c21(blockSize, block), c22(blockSize, block),
/*matrices storing intermediate results*/
aBlock(blockSize, block), bBlock(blockSize, block),
/*set of submatrices derived from partitions*/
m1(blockSize, block), m2(blockSize, block),
m3(blockSize, block), m4(blockSize, block),
m5(blockSize, block), m6(blockSize, block),
m7(blockSize, block);
//partition matrices
for (int i=0; i<blockSize; i++){
for (int j=0; j<blockSize; j++){
a11[i][j] = newA[i][j];
a12[i][j] = newA[i][j+blockSize];
a21[i][j] = newA[i+blockSize][j];
a22[i][j] = newA[i+blockSize][j+blockSize];
b11[i][j] = newB[i][j];
b12[i][j] = newB[i][j+blockSize];
b21[i][j] = newB[i+blockSize][j];
b22[i][j] = newB[i+blockSize][j+blockSize];
}
}
//compute submatrices
//m1 = (a11+a22)(b11+b22)
add(a11, a22, aBlock, blockSize);
add(b11, b22, bBlock, blockSize);
multiplyStrassen(aBlock, bBlock, m1, blockSize);
//m2 = (a21+a22)b11
add(a21, a22, aBlock, blockSize);
multiplyStrassen(aBlock, b11, m2, blockSize);
//m3 = a11(b12-b22)
subtract(b12, b22, bBlock, blockSize);
multiplyStrassen(a11, bBlock, m3, blockSize);
//m4 = a22(b21-b11)
subtract(b21, b11, bBlock, blockSize);
multiplyStrassen(a22, bBlock, m4, blockSize);
//m5 = (a11+a12)b22
add(a11, a12, aBlock, blockSize);
multiplyStrassen(aBlock, b22, m5, blockSize);
//m6 = (a21-a11)(b11+b12)
subtract(a21, a11, aBlock, blockSize);
add(b11, b12, bBlock, blockSize);
multiplyStrassen(aBlock, bBlock, m6, blockSize);
//m7 = (a12-a22)(b12+b22)
subtract(a12, a22, aBlock, blockSize);
add(b12, b22, bBlock, blockSize);
multiplyStrassen(aBlock, bBlock, m7, blockSize);
//calculate result submatrices
//c11 = m1+m4-m5+m7
add(m1, m4, aBlock, blockSize);
subtract(aBlock, m5, bBlock, blockSize);
add(bBlock, m7, c11, blockSize);
//c12 = m3+m5
add(m3, m5, c12, blockSize);
//c21 = m2+m4
add(m2, m4, c12, blockSize);
//c22 = m1-m2+m3+m6
subtract(m1, m2, aBlock, blockSize);
add(aBlock, m3, bBlock, blockSize);
add(bBlock, m6, c22, blockSize);
//calculate final result matrix
for(int i=0; i<blockSize; i++){
for(int j=0; j<blockSize; j++){
newC[i][j] = c11[i][j];
newC[i][blockSize+j] = c12[i][j];
newC[blockSize+i][j] = c21[i][j];
newC[blockSize+i][blockSize+j] = c22[i][j];
}
}
//remove additional values from expanded matrix
for(int i=0; i<n; i++){
for(int j=0; j<n; j++){
c[i][j] = newC[i][j];
}
}
}
}
double calculateMean(vector<double> data, int size)
{
double sum = 0.0, mean = 0.0;
for (int i = 0; i < size; ++i) {
sum += data[i];
}
mean = sum / size;
return mean;
}
int main(int argc, char *argv[])
{
srand(time(0)); //seed for random number generation
// number of sample size considered to evaluate average execution time
int sampleSize = 10;
int matSize = 1000;
double startTime;
double elapsedTime;
double standardMean;
double strassenMean;
//set threshold value if given by user
if(argc>1) {
thresholdSize = atoi(argv[1]);
}
//vectors storing execution time values
vector<double> standardTime(sampleSize);
vector<double> strassenTime(sampleSize);
for (int k = 0; k < sampleSize; k++) {
//initialize vectors for matrices a, b, c: a*b = c
vector< vector<double> >
a(matSize,vector<double>(matSize)),
b(matSize,vector<double>(matSize)),
c(matSize,vector<double>(matSize));
initMat(a,b,matSize);
//standard execution
startTime = time(0);
multiplyMatStandard(a,b,c,matSize);
elapsedTime = time(0) - startTime;
standardTime[k] = elapsedTime;
//multiplication using Strassen's
startTime = 0;
startTime = time(0);
multiplyStrassen(a,b,c,matSize);
elapsedTime = time(0) - startTime;
strassenTime[k] = elapsedTime;
}
cout << "Standard multiplication"<< endl;
standardMean = calculateMean(standardTime, sampleSize);
cout << "Average time taken to execute matrices of size - "
<< matSize << " : " << standardMean << endl;
cout << endl;
cout << "Multiplication using Strassen's"<< endl;
cout << "Threshold used: "<< thresholdSize <<endl;
strassenMean = calculateMean(strassenTime, sampleSize);
cout << "Average time taken to execute matrices of size - "
<< matSize << " : " << strassenMean << endl;
cout << endl;
cout << "Speed up gained by using Strassen's-" << matSize
<< " : " << standardMean/strassenMean << endl;
cout << endl;
return 0;
}Results
Above execution gave a speed increase of 2.
