gnu: combinatorial-blas: Adjust AWPM headers for library use.
Fixes use in latest versions of SuperLU_DIST. See e.g. https://github.com/xiaoyeli/superlu_dist/issues/60 * gnu/packages/patches/combinatorial-blas-awpm.patch: Remove globals related to performance measurement. Declare non-template function inline.
This commit is contained in:
parent
013cf93234
commit
38dd27e866
@ -1,4 +1,6 @@
|
||||
Install BipartiteMatchings headers for SuperLU_DIST.
|
||||
Install BipartiteMatchings headers for SuperLU_DIST. Removes global variables
|
||||
and code related to performance measurement that is not useful when used in a
|
||||
library setting.
|
||||
|
||||
--- a/BipartiteMatchings/ApproxWeightPerfectMatching.h
|
||||
+++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h
|
||||
@ -11,6 +13,167 @@ Install BipartiteMatchings headers for SuperLU_DIST.
|
||||
#include "BPMaximalMatching.h"
|
||||
#include "BPMaximumMatching.h"
|
||||
#include <parallel/algorithm>
|
||||
@@ -39,9 +39,6 @@
|
||||
std::shared_ptr<CommGrid> commGrid;
|
||||
};
|
||||
|
||||
-double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp;
|
||||
-
|
||||
-
|
||||
template <class IT, class NT>
|
||||
std::vector<std::tuple<IT,IT,NT>> ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World)
|
||||
{
|
||||
@@ -391,7 +388,7 @@
|
||||
|
||||
|
||||
|
||||
-int ThreadBuffLenForBinning(int itemsize, int nbins)
|
||||
+inline int ThreadBuffLenForBinning(int itemsize, int nbins)
|
||||
{
|
||||
// 1MB shared cache (per 2 cores) in KNL
|
||||
#ifndef L2_CACHE_SIZE
|
||||
@@ -417,7 +414,6 @@
|
||||
|
||||
|
||||
|
||||
- double tstart = MPI_Wtime();
|
||||
|
||||
|
||||
MPI_Comm World = param.commGrid->GetWorld();
|
||||
@@ -528,9 +524,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
- t1Comp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
-
|
||||
// Step 3: Communicate data
|
||||
|
||||
std::vector<int> recvcnt (param.nprocs);
|
||||
@@ -548,7 +541,6 @@
|
||||
std::vector< std::tuple<IT,IT,NT> > recvTuples1(totrecv);
|
||||
MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
|
||||
MPI_Type_free(&MPI_tuple);
|
||||
- t1Comm = MPI_Wtime() - tstart;
|
||||
return recvTuples1;
|
||||
}
|
||||
|
||||
@@ -730,9 +722,6 @@
|
||||
|
||||
// Step 4: Communicate data
|
||||
|
||||
- t2Comp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
-
|
||||
std::vector<int> recvcnt (param.nprocs);
|
||||
std::vector<int> rdispls (param.nprocs, 0);
|
||||
|
||||
@@ -748,7 +737,6 @@
|
||||
std::vector< std::tuple<IT,IT,IT,NT> > recvTuples1(totrecv);
|
||||
MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World);
|
||||
MPI_Type_free(&MPI_tuple);
|
||||
- t2Comm = MPI_Wtime() - tstart;
|
||||
return recvTuples1;
|
||||
}
|
||||
|
||||
@@ -836,7 +824,6 @@
|
||||
param.myrank = myrank;
|
||||
param.commGrid = commGrid;
|
||||
|
||||
- double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0;
|
||||
|
||||
// -----------------------------------------------------------
|
||||
// replicate mate vectors for mateCol2Row
|
||||
@@ -975,11 +962,7 @@
|
||||
}
|
||||
|
||||
//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
|
||||
- double t3Comp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
recvTuples1 = ExchangeData1(tempTuples1, World);
|
||||
- double t3Comm = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
|
||||
std::vector<std::tuple<IT,IT,IT,IT, NT>> bestTuplesPhase4 (lncol);
|
||||
// we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4
|
||||
@@ -1041,14 +1024,9 @@
|
||||
|
||||
|
||||
//vector< tuple<IT,IT,IT, NT> >().swap(recvTuples1);
|
||||
- double t4Comp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
|
||||
std::vector<std::tuple<IT,IT,IT,IT>> recvWinnerTuples = ExchangeData1(winnerTuples, World);
|
||||
|
||||
- double t4Comm = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
-
|
||||
// at the owner of (mj,j)
|
||||
std::vector<std::tuple<IT,IT>> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj)
|
||||
std::vector<std::tuple<IT,IT>> colBcastTuples(recvWinnerTuples.size()); //(j,i)
|
||||
@@ -1065,15 +1043,10 @@
|
||||
colBcastTuples[k] = std::make_tuple(j,i);
|
||||
rowBcastTuples[k] = std::make_tuple(mj,mi);
|
||||
}
|
||||
- double t5Comp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
|
||||
std::vector<std::tuple<IT,IT>> updatedR2C = MateBcast(rowBcastTuples, RowWorld);
|
||||
std::vector<std::tuple<IT,IT>> updatedC2R = MateBcast(colBcastTuples, ColWorld);
|
||||
|
||||
- double t5Comm = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
-
|
||||
#ifdef THREADED
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@@ -1095,13 +1068,9 @@
|
||||
}
|
||||
|
||||
|
||||
- double tUpdateMateComp = MPI_Wtime() - tstart;
|
||||
- tstart = MPI_Wtime();
|
||||
// update weights of matched edges
|
||||
// we can do better than this since we are doing sparse updates
|
||||
ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R);
|
||||
- double tUpdateWeight = MPI_Wtime() - tstart;
|
||||
-
|
||||
|
||||
weightPrev = weightCur;
|
||||
weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw);
|
||||
@@ -1110,32 +1079,8 @@
|
||||
//UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
|
||||
//CheckMatching(mateRow2Col,mateCol2Row);
|
||||
|
||||
- if(myrank==0)
|
||||
- {
|
||||
- std::cout << t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl;
|
||||
-
|
||||
- t1CompAll += t1Comp;
|
||||
- t1CommAll += t1Comm;
|
||||
- t2CompAll += t2Comp;
|
||||
- t2CommAll += t2Comm;
|
||||
- t3CompAll += t3Comp;
|
||||
- t3CommAll += t3Comm;
|
||||
- t4CompAll += t4Comp;
|
||||
- t4CommAll += t4Comm;
|
||||
- t5CompAll += t5Comp;
|
||||
- t5CommAll += t5Comm;
|
||||
- tUpdateMateCompAll += tUpdateMateComp;
|
||||
- tUpdateWeightAll += tUpdateWeight;
|
||||
-
|
||||
- }
|
||||
}
|
||||
|
||||
- if(myrank==0)
|
||||
- {
|
||||
- std::cout << "=========== overal timing ==========" << std::endl;
|
||||
- std::cout << t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl;
|
||||
- }
|
||||
-
|
||||
// update the distributed mate vectors from replicated mate vectors
|
||||
UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R);
|
||||
//weightCur = MatchingWeight(RepMateWC2R, RowWorld);
|
||||
--- a/BipartiteMatchings/BPMaximalMatching.h
|
||||
+++ b/BipartiteMatchings/BPMaximalMatching.h
|
||||
@@ -1,7 +1,7 @@
|
||||
@ -22,6 +185,33 @@ Install BipartiteMatchings headers for SuperLU_DIST.
|
||||
#include <iostream>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
@@ -14,8 +14,6 @@
|
||||
#define GREEDY 1
|
||||
#define KARP_SIPSER 2
|
||||
#define DMD 3
|
||||
-MTRand GlobalMT(123); // for reproducible result
|
||||
-double tTotalMaximal;
|
||||
|
||||
namespace combblas {
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col,
|
||||
FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true)
|
||||
{
|
||||
-
|
||||
+ static MTRand GlobalMT(123); // for reproducible result
|
||||
typedef VertexTypeML < IT, IT> VertexType;
|
||||
int nprocs, myrank;
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
|
||||
@@ -354,8 +354,6 @@
|
||||
|
||||
}
|
||||
|
||||
- tTotalMaximal = MPI_Wtime() - tStart;
|
||||
-
|
||||
IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
|
||||
std::vector<double> totalTimes(timing[0].size(),0);
|
||||
for(int i=0; i<timing.size(); i++)
|
||||
--- a/BipartiteMatchings/BPMaximumMatching.h
|
||||
+++ b/BipartiteMatchings/BPMaximumMatching.h
|
||||
@@ -1,7 +1,7 @@
|
||||
@ -33,6 +223,32 @@ Install BipartiteMatchings headers for SuperLU_DIST.
|
||||
#include <mpi.h>
|
||||
#include <sys/time.h>
|
||||
#include <iostream>
|
||||
@@ -11,7 +11,6 @@
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include "MatchingDefs.h"
|
||||
-double tTotalMaximum;
|
||||
|
||||
namespace combblas {
|
||||
|
||||
@@ -231,7 +231,7 @@
|
||||
void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec<IT, IT>& mateRow2Col,
|
||||
FullyDistVec<IT, IT>& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false)
|
||||
{
|
||||
-
|
||||
+ static MTRand GlobalMT(123); // for reproducible result
|
||||
typedef VertexTypeMM <IT> VertexType;
|
||||
|
||||
int nthreads=1;
|
||||
@@ -420,8 +420,6 @@
|
||||
|
||||
MPI_Win_free(&winLeaves);
|
||||
|
||||
- tTotalMaximum = MPI_Wtime() - tstart;
|
||||
-
|
||||
//isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol);
|
||||
//isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this
|
||||
|
||||
--- a/BipartiteMatchings/MatchingDefs.h
|
||||
+++ b/BipartiteMatchings/MatchingDefs.h
|
||||
@@ -9,7 +9,7 @@
|
||||
|
Loading…
Reference in New Issue
Block a user