dune-istl  2.2.1
repartition.hh
Go to the documentation of this file.
1 #ifndef DUNE_REPARTITION_HH
2 #define DUNE_REPARTITION_HH
3 
4 #include <cassert>
5 #include <map>
6 #include <utility>
7 
8 #if HAVE_PARMETIS
9 #include <parmetis.h>
10 #endif
11 
12 #include <dune/common/timer.hh>
13 #include <dune/common/enumset.hh>
14 #include <dune/common/mpitraits.hh>
15 #include <dune/common/stdstreams.hh>
16 #include <dune/common/parallel/communicator.hh>
17 #include <dune/common/parallel/indexset.hh>
18 #include <dune/common/parallel/indicessyncer.hh>
19 #include <dune/common/parallel/remoteindices.hh>
20 
22 #include <dune/istl/paamg/graph.hh>
23 
32 namespace Dune
33 {
34 #if HAVE_MPI
35 
48  template<class G, class T1, class T2>
50  {
52  typedef typename IndexSet::LocalIndex::Attribute Attribute;
53 
54  IndexSet& indexSet = oocomm.indexSet();
56 
57  // The type of the const vertex iterator.
58  typedef typename G::ConstVertexIterator VertexIterator;
59 
60 
61  std::size_t sum=0, needed = graph.noVertices()-indexSet.size();
62  std::vector<std::size_t> neededall(oocomm.communicator().size(), 0);
63 
64  MPI_Allgather(&needed, 1, MPITraits<std::size_t>::getType() , &(neededall[0]), 1, MPITraits<std::size_t>::getType(), oocomm.communicator());
65  for(int i=0; i<oocomm.communicator().size(); ++i)
66  sum=sum+neededall[i]; // MAke this for generic
67 
68  if(sum==0)
69  // Nothing to do
70  return;
71 
72  //Compute Maximum Global Index
73  T1 maxgi=0;
74  typedef typename IndexSet::const_iterator Iterator;
75  Iterator end;
76  end = indexSet.end();
77  for(Iterator it = indexSet.begin(); it != end; ++it)
78  maxgi=std::max(maxgi,it->global());
79 
80  //Process p creates global indices consecutively
81  //starting atmaxgi+\sum_{i=1}^p neededall[i]
82  // All created indices are owned by the process
83  maxgi=oocomm.communicator().max(maxgi);
84  ++maxgi;//Sart with the next free index.
85 
86  for(int i=0; i<oocomm.communicator().rank(); ++i)
87  maxgi=maxgi+neededall[i]; // TODO: make this more generic
88 
89  // Store the global index information for repairing the remote index information
90  std::map<int,SLList<std::pair<T1,Attribute> > > globalIndices;
91  storeGlobalIndicesOfRemoteIndices(globalIndices, oocomm.remoteIndices(), indexSet);
92  indexSet.beginResize();
93 
94  for(VertexIterator vertex = graph.begin(), vend=graph.end(); vertex != vend; ++vertex){
95  const typename IndexSet::IndexPair* pair=lookup.pair(*vertex);
96  if(pair==0){
97  // No index yet, add new one
98  indexSet.add(maxgi, typename IndexSet::LocalIndex(*vertex, OwnerOverlapCopyAttributeSet::owner, false));
99  ++maxgi;
100  }
101  }
102 
103  indexSet.endResize();
104 
105  repairLocalIndexPointers(globalIndices, oocomm.remoteIndices(), indexSet);
106 
107  oocomm.freeGlobalLookup();
108  oocomm.buildGlobalLookup();
109 #ifdef DEBUG_REPART
110  std::cout<<"Holes are filled!"<<std::endl;
111  std::cout<<oocomm.communicator().rank()<<": "<<oocomm.indexSet()<<std::endl;
112 #endif
113  }
114 
115  namespace
116  {
117 
118  class ParmetisDuneIndexMap
119  {
120  public:
121  template<class Graph, class OOComm>
122  ParmetisDuneIndexMap(const Graph& graph, const OOComm& com);
123  int toParmetis(int i) const
124  {
125  return duneToParmetis[i];
126  }
127  int toLocalParmetis(int i) const
128  {
129  return duneToParmetis[i]-base_;
130  }
131  int operator[](int i) const
132  {
133  return duneToParmetis[i];
134  }
135  int toDune(int i) const
136  {
137  return parmetisToDune[i];
138  }
139  std::vector<int>::size_type numOfOwnVtx() const
140  {
141  return parmetisToDune.size();
142  }
143  int* vtxDist()
144  {
145  return &vtxDist_[0];
146  }
148  private:
149  int base_;
150  std::vector<int> duneToParmetis;
151  std::vector<int> parmetisToDune;
152  // range of vertices for processor i: vtxdist[i] to vtxdist[i+1] (parmetis global)
153  std::vector<int> vtxDist_;
154  };
155 
156  template<class G, class OOComm>
157  ParmetisDuneIndexMap::ParmetisDuneIndexMap(const G& graph, const OOComm& oocomm)
158  : duneToParmetis(graph.noVertices(), -1), vtxDist_(oocomm.communicator().size()+1)
159  {
160  int npes=oocomm.communicator().size(), mype=oocomm.communicator().rank();
161 
162  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
163  typedef typename OOComm::OwnerSet OwnerSet;
164 
165  int numOfOwnVtx=0;
166  Iterator end = oocomm.indexSet().end();
167  for(Iterator index = oocomm.indexSet().begin(); index != end; ++index) {
168  if (OwnerSet::contains(index->local().attribute())) {
169  numOfOwnVtx++;
170  }
171  }
172  parmetisToDune.resize(numOfOwnVtx);
173  std::vector<int> globalNumOfVtx(npes);
174  // make this number available to all processes
175  MPI_Allgather(&numOfOwnVtx, 1, MPI_INT, &(globalNumOfVtx[0]), 1, MPI_INT, oocomm.communicator());
176 
177  int base=0;
178  vtxDist_[0] = 0;
179  for(int i=0; i<npes; i++) {
180  if (i<mype) {
181  base += globalNumOfVtx[i];
182  }
183  vtxDist_[i+1] = vtxDist_[i] + globalNumOfVtx[i];
184  }
186  base_=base;
187 
188  // The type of the const vertex iterator.
189  typedef typename G::ConstVertexIterator VertexIterator;
190 #ifdef DEBUG_REPART
191  std::cout << oocomm.communicator().rank()<<" vtxDist: ";
192  for(int i=0; i<= npes;++i)
193  std::cout << vtxDist_[i]<<" ";
194  std::cout<<std::endl;
195 #endif
196 
197  // Traverse the graph and assign a new consecutive number/index
198  // starting by "base" to all owner vertices.
199  // The new index is used as the ParMETIS global index and is
200  // stored in the vector "duneToParmetis"
201  VertexIterator vend = graph.end();
202  for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex) {
203  const typename OOComm::ParallelIndexSet::IndexPair* index=oocomm.globalLookup().pair(*vertex);
204  assert(index);
205  if (OwnerSet::contains(index->local().attribute())) {
206  // assign and count the index
207  parmetisToDune[base-base_]=index->local();
208  duneToParmetis[index->local()] = base++;
209  }
210  }
211 
212  // At this point, every process knows the ParMETIS global index
213  // of it's owner vertices. The next step is to get the
214  // ParMETIS global index of the overlap vertices from the
215  // associated processes. To do this, the Dune::Interface class
216  // is used.
217 #ifdef DEBUG_REPART
218  std::cout <<oocomm.communicator().rank()<<": before ";
219  for(std::size_t i=0; i<duneToParmetis.size(); ++i)
220  std::cout<<duneToParmetis[i]<<" ";
221  std::cout<<std::endl;
222 #endif
223  oocomm.copyOwnerToAll(duneToParmetis,duneToParmetis);
224 #ifdef DEBUG_REPART
225  std::cout <<oocomm.communicator().rank()<<": after ";
226  for(std::size_t i=0; i<duneToParmetis.size(); ++i)
227  std::cout<<duneToParmetis[i]<<" ";
228  std::cout<<std::endl;
229 #endif
230  }
231  }
232 
234  : public Interface
235  {
236  void setCommunicator(MPI_Comm comm)
237  {
238  communicator_=comm;
239  }
240  template<class Flags,class IS>
241  void buildSendInterface(const std::vector<int>& toPart, const IS& idxset)
242  {
243  std::map<int,int> sizes;
244 
245  typedef typename IS::const_iterator IIter;
246  for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
247  if(Flags::contains(i->local().attribute()))
248  ++sizes[toPart[i->local()]];
249 
250  // Allocate the necessary space
251  typedef std::map<int,int>::const_iterator MIter;
252  for(MIter i=sizes.begin(), end=sizes.end(); i!=end; ++i)
253  interfaces()[i->first].first.reserve(i->second);
254 
255  //Insert the interface information
256  typedef typename IS::const_iterator IIter;
257  for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
258  if(Flags::contains(i->local().attribute()))
259  interfaces()[toPart[i->local()]].first.add(i->local());
260  }
261 
262  void reserveSpaceForReceiveInterface(int proc, int size)
263  {
264  interfaces()[proc].second.reserve(size);
265  }
266  void addReceiveIndex(int proc, std::size_t idx)
267  {
268  interfaces()[proc].second.add(idx);
269  }
270  template<typename TG>
271  void buildReceiveInterface(std::vector<std::pair<TG,int> >& indices)
272  {
273  typedef typename std::vector<std::pair<TG,int> >::const_iterator VIter;
274  std::size_t i=0;
275  for(VIter idx=indices.begin(); idx!= indices.end(); ++idx){
276  interfaces()[idx->second].second.add(i++);
277  }
278  }
279 
281  {
282  }
283 
284  };
285 
286  namespace
287  {
297  template<class GI>
298  void createSendBuf(std::vector<GI>& ownerVec, std::set<GI>& overlapVec, std::set<int>& neighbors, char *sendBuf, int buffersize, MPI_Comm comm) {
299  // Pack owner vertices
300  std::size_t s=ownerVec.size();
301  int pos=0;
302  if(s==0)
303  ownerVec.resize(1); // otherwise would read beyond the memory bound
304  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
305  MPI_Pack(&(ownerVec[0]), s, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
306  s = overlapVec.size();
307  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
308  typedef typename std::set<GI>::iterator Iter;
309  for(Iter i=overlapVec.begin(), end= overlapVec.end(); i != end; ++i)
310  MPI_Pack(const_cast<GI*>(&(*i)), 1, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
311 
312  s=neighbors.size();
313  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
314  typedef typename std::set<int>::iterator IIter;
315 
316  for(IIter i=neighbors.begin(), end= neighbors.end(); i != end; ++i)
317  MPI_Pack(const_cast<int*>(&(*i)), 1, MPI_INT, sendBuf, buffersize, &pos, comm);
318  }
327  template<class GI>
328  void saveRecvBuf(char *recvBuf, int bufferSize, std::vector<std::pair<GI,int> >& ownerVec,
329  std::set<GI>& overlapVec, std::set<int>& neighbors, RedistributeInterface& inf, int from, MPI_Comm comm) {
330  std::size_t size;
331  int pos=0;
332  // unpack owner vertices
333  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
334  inf.reserveSpaceForReceiveInterface(from, size);
335  ownerVec.reserve(ownerVec.size()+size);
336  for(;size!=0;--size){
337  GI gi;
338  MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
339  ownerVec.push_back(std::make_pair(gi,from));
340  }
341  // unpack overlap vertices
342  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
343  typename std::set<GI>::iterator ipos = overlapVec.begin();
344  Dune::dverb << "unpacking "<<size<<" overlap"<<std::endl;
345  for(;size!=0;--size){
346  GI gi;
347  MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
348  ipos=overlapVec.insert(ipos, gi);
349  }
350  //unpack neighbors
351  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
352  Dune::dverb << "unpacking "<<size<<" neighbors"<<std::endl;
353  typename std::set<int>::iterator npos = neighbors.begin();
354  for(;size!=0;--size){
355  int n;
356  MPI_Unpack(recvBuf, bufferSize, &pos, &n, 1, MPI_INT, comm);
357  npos=neighbors.insert(npos, n);
358  }
359  }
360 
374  template<typename T>
375  void getDomain(const MPI_Comm& comm, T *part, int numOfOwnVtx, int nparts, int *myDomain, std::vector<int> &domainMapping) {
376  int npes, mype;
377  MPI_Comm_size(comm, &npes);
378  MPI_Comm_rank(comm, &mype);
379  MPI_Status status;
380 
381  *myDomain = -1;
382  int i=0;
383  int j=0;
384 
385  std::vector<int> domain(nparts);
386  std::vector<int> assigned(npes);
387  // init
388  for (i=0; i<nparts; i++) {
389  domainMapping[i] = -1;
390  domain[i] = 0;
391  }
392  for (i=0; i<npes; i++) {
393  assigned[i] = -0;
394  }
395  // count the occurance of domains
396  for (i=0; i<numOfOwnVtx; i++) {
397  domain[part[i]]++;
398  }
399 
400  int *domainMatrix = new int[npes * nparts];
401  // init
402  for(i=0; i<npes*nparts; i++) {
403  domainMatrix[i]=-1;
404  }
405 
406  // init buffer with the own domain
407  int *buf = new int[nparts];
408  for (i=0; i<nparts; i++) {
409  buf[i] = domain[i];
410  domainMatrix[mype*nparts+i] = domain[i];
411  }
412  int pe=0;
413  int src = (mype-1+npes)%npes;
414  int dest = (mype+1)%npes;
415  // ring communication, we need n-1 communications for n processors
416  for (i=0; i<npes-1; i++) {
417  MPI_Sendrecv_replace(buf, nparts, MPI_INT, dest, 0, src, 0, comm, &status);
418  // pe is the process of the actual received buffer
419  pe = ((mype-1-i)+npes)%npes;
420  for(j=0; j<nparts; j++) {
421  // save the values to the domain matrix
422  domainMatrix[pe*nparts+j] = buf[j];
423  }
424  }
425  delete[] buf;
426 
427  // Start the domain calculation.
428  // The process which contains the maximum number of vertices of a
429  // particular domain is selected to choose it's favorate domain
430  int maxOccurance = 0;
431  pe = -1;
432  for(i=0; i<nparts; i++) {
433  for(j=0; j<npes; j++) {
434  // process has no domain assigned
435  if (assigned[j]==0) {
436  if (maxOccurance < domainMatrix[j*nparts+i]) {
437  maxOccurance = domainMatrix[j*nparts+i];
438  pe = j;
439  }
440  }
441 
442  }
443  if (pe!=-1) {
444  // process got a domain, ...
445  domainMapping[i] = pe;
446  // ...mark as assigned
447  assigned[pe] = 1;
448  if (pe==mype) {
449  *myDomain = i;
450  }
451  pe = -1;
452  }
453  maxOccurance = 0;
454  }
455 
456  delete[] domainMatrix;
457 
458  }
459 
460  struct SortFirst
461  {
462  template<class T>
463  bool operator()(const T& t1, const T& t2) const
464  {
465  return t1<t2;
466  }
467  };
468 
469 
480  template<class GI>
481  void mergeVec(std::vector<std::pair<GI, int> >& ownerVec, std::set<GI>& overlapSet) {
482 
483  typedef typename std::vector<std::pair<GI,int> >::const_iterator VIter;
484 #ifdef DEBUG_REPART
485  // Safty check for duplicates.
486  if(ownerVec.size()>0)
487  {
488  VIter old=ownerVec.begin();
489  for(VIter i=old+1, end=ownerVec.end(); i != end; old=i++)
490  {
491  if(i->first==old->first)
492  {
493  std::cerr<<"Value at indes"<<old-ownerVec.begin()<<" is the same as at index "
494  <<i-ownerVec.begin()<<" ["<<old->first<<","<<old->second<<"]==["
495  <<i->first<<","<<i->second<<"]"<<std::endl;
496  throw "Huch!";
497  }
498  }
499  }
500 
501 #endif
502 
503  typedef typename std::set<GI>::iterator SIter;
504  VIter v=ownerVec.begin(), vend=ownerVec.end();
505  for(SIter s=overlapSet.begin(), send=overlapSet.end(); s!=send;)
506  {
507  while(v!=vend && v->first<*s) ++v;
508  if(v!=vend && v->first==*s){
509  // Move to the next element before erasing
510  // thus s stays valid!
511  SIter tmp=s;
512  ++s;
513  overlapSet.erase(tmp);
514  }else
515  ++s;
516  }
517  }
518 
519 
533  template<class OwnerSet, class Graph, class IS, class GI>
534  void getNeighbor(const Graph& g, std::vector<int>& part,
535  typename Graph::VertexDescriptor vtx, const IS& indexSet,
536  int toPe, std::set<GI>& neighbor, std::set<int>& neighborProcs) {
537  typedef typename Graph::ConstEdgeIterator Iter;
538  for(Iter edge=g.beginEdges(vtx), end=g.endEdges(vtx); edge!=end; ++edge)
539  {
540  const typename IS::IndexPair* pindex = indexSet.pair(edge.target());
541  assert(pindex);
542  if(part[pindex->local()]!=toPe || !OwnerSet::contains(pindex->local().attribute()))
543  {
544  // is sent to another process and therefore becomes overlap
545  neighbor.insert(pindex->global());
546  neighborProcs.insert(part[pindex->local()]);
547  }
548  }
549  }
550 
551  template<class T, class I>
552  void my_push_back(std::vector<T>& ownerVec, const I& index, int proc)
553  {
554  ownerVec.push_back(index);
555  }
556 
557  template<class T, class I>
558  void my_push_back(std::vector<std::pair<T,int> >& ownerVec, const I& index, int proc)
559  {
560  ownerVec.push_back(std::make_pair(index,proc));
561  }
562  template<class T>
563  void reserve(std::vector<T>&, RedistributeInterface&, int)
564  {
565  }
566  template<class T>
567  void reserve(std::vector<std::pair<T,int> >& ownerVec, RedistributeInterface& redist, int proc)
568  {
569  redist.reserveSpaceForReceiveInterface(proc, ownerVec.size());
570  }
571 
572 
590  template<class OwnerSet, class G, class IS, class T, class GI>
591  void getOwnerOverlapVec(const G& graph, std::vector<int>& part, IS& indexSet,
592  int myPe, int toPe, std::vector<T>& ownerVec, std::set<GI>& overlapSet,
593  RedistributeInterface& redist, std::set<int>& neighborProcs) {
594 
595  //typedef typename IndexSet::const_iterator Iterator;
596  typedef typename IS::const_iterator Iterator;
597  for(Iterator index = indexSet.begin(); index != indexSet.end(); ++index) {
598  // Only Process owner vertices, the others are not in the parmetis graph.
599  if(OwnerSet::contains(index->local().attribute()))
600  {
601  if(part[index->local()]==toPe)
602  {
603  getNeighbor<OwnerSet>(graph, part, index->local(), indexSet,
604  toPe, overlapSet, neighborProcs);
605  my_push_back(ownerVec, index->global(), toPe);
606  }
607  }
608  }
609  reserve(ownerVec, redist, toPe);
610 
611  }
612 
613 
620  template<class F, class IS>
621  inline bool isOwner(IS& indexSet, int index) {
622 
623  const typename IS::IndexPair* pindex=indexSet.pair(index);
624 
625  assert(pindex);
626  return F::contains(pindex->local().attribute());
627  }
628 
629 
630  class BaseEdgeFunctor
631  {
632  public:
633  BaseEdgeFunctor(int* adj,const ParmetisDuneIndexMap& data)
634  :i_(), adj_(adj), data_(data)
635  {}
636 
637  template<class T>
638  void operator()(const T& edge)
639  {
640  // Get the egde weight
641  // const Weight& weight=edge.weight();
642  adj_[i_] = data_.toParmetis(edge.target());
643  i_++;
644  }
645  std::size_t index()
646  {
647  return i_;
648  }
649 
650  private:
651  std::size_t i_;
652  int* adj_;
653  const ParmetisDuneIndexMap& data_;
654  };
655 
656  template<typename G>
657  struct EdgeFunctor
658  : public BaseEdgeFunctor
659  {
660  EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
661  : BaseEdgeFunctor(adj, data)
662  {}
663 
664  int* getWeights()
665  {
666  return NULL;
667  }
668  void free(){}
669  };
670 
671  template<class G, class V, class E, class VM, class EM>
672  class EdgeFunctor<Dune::Amg::PropertiesGraph<G,V,E,VM,EM> >
673  : public BaseEdgeFunctor
674  {
675  public:
676  EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
677  :BaseEdgeFunctor(adj, data)
678  {
679  weight_=new int[s];
680  }
681 
682  template<class T>
683  void operator()(const T& edge)
684  {
685  weight_[index()]=edge.properties().depends()?3:1;
686  BaseEdgeFunctor::operator()(edge);
687  }
688  int* getWeights()
689  {
690  return weight_;
691  }
692  void free(){
693  if(weight_!=0){
694  delete weight_;
695  weight_=0;
696  }
697  }
698  private:
699  int* weight_;
700  };
701 
702 
703 
717  template<class F, class G, class IS, class EW>
718  void getAdjArrays(G& graph, IS& indexSet, int *xadj,
719  EW& ew)
720  {
721  int j=0;
722 
723  // The type of the const vertex iterator.
724  typedef typename G::ConstVertexIterator VertexIterator;
725  //typedef typename IndexSet::const_iterator Iterator;
726  typedef typename IS::const_iterator Iterator;
727 
728  VertexIterator vend = graph.end();
729  Iterator end;
730 
731  for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex){
732  if (isOwner<F>(indexSet,*vertex)) {
733  // The type of const edge iterator.
734  typedef typename G::ConstEdgeIterator EdgeIterator;
735  EdgeIterator eend = vertex.end();
736  xadj[j] = ew.index();
737  j++;
738  for(EdgeIterator edge = vertex.begin(); edge != eend; ++edge){
739  ew(edge);
740  }
741  }
742  }
743  xadj[j] = ew.index();
744  }
745  }// end anonymous namespace
746 
747  template<class G, class T1, class T2>
748  bool buildCommunication(const G& graph, std::vector<int>& realparts,
751  RedistributeInterface& redistInf,
752  bool verbose=false);
753 #if HAVE_PARMETIS
754  extern "C"{
755  void METIS_PartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt,
756  idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts,
757  int *options, int *edgecut, idxtype *part);
758 
759  void METIS_PartGraphRecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt,
760  idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts,
761  int *options, int *edgecut, idxtype *part);
762  }
763 #endif
764 
765  template<class S, class T>
766  inline void print_carray(S& os, T* array, std::size_t l)
767  {
768  for(T *cur=array, *end=array+l; cur!=end; ++cur)
769  os<<*cur<<" ";
770  }
771 #if !HAVE_PARMETIS
772  typedef std::size_t idxtype;
773 #endif
774 
775  inline bool isValidGraph(std::size_t noVtx, std::size_t gnoVtx, idxtype noEdges, idxtype* xadj,
776  idxtype* adjncy, bool checkSymmetry)
777  {
778  bool correct=true;
779 
780  for(idxtype vtx=0; vtx<(idxtype)noVtx; ++vtx){
781  if(xadj[vtx]>noEdges||xadj[vtx]<0){
782  std::cerr <<"Check graph: xadj["<<vtx<<"]="<<xadj[vtx]<<" (>"
783  <<noEdges<<") out of range!"<<std::endl;
784  correct=false;
785  }
786  if(xadj[vtx+1]>noEdges||xadj[vtx+1]<0){
787  std::cerr <<"Check graph: xadj["<<vtx+1<<"]="<<xadj[vtx+1]<<" (>"
788  <<noEdges<<") out of range!"<<std::endl;
789  correct=false;
790  }
791  // Check numbers in adjncy
792  for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){
793  if(adjncy[i]<0||((std::size_t)adjncy[i])>gnoVtx){
794  std::cerr<<" Edge "<<adjncy[i]<<" out of range ["<<0<<","<<noVtx<<")"
795  <<std::endl;
796  correct=false;
797  }
798  }
799  if(checkSymmetry){
800  for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){
801  idxtype target=adjncy[i];
802  // search for symmetric edge
803  int found=0;
804  for(idxtype j=xadj[target]; j< xadj[target+1];++j)
805  if(adjncy[j]==vtx)
806  found++;
807  if(found!=1){
808  std::cerr<<"Edge ("<<target<<","<<vtx<<") "<<i<<" time"<<std::endl;
809  correct=false;
810  }
811  }
812  }
813  }
814  return correct;
815  }
816 
817  template<class M, class T1, class T2>
820  RedistributeInterface& redistInf,
821  bool verbose=false)
822  {
823  if(verbose && oocomm.communicator().rank()==0)
824  std::cout<<"Repartitioning from "<<oocomm.communicator().size()
825  <<" to "<<nparts<<" parts"<<std::endl;
826  Timer time;
827  int rank = oocomm.communicator().rank();
828 #if !HAVE_PARMETIS
829  int* part = new int[1];
830  part[0]=0;
831 #else
832  idxtype* part = new idxtype[1]; // where all our data moves to
833 
834  if(nparts>1){
835 
836  part[0]=rank;
837 
838  { // sublock for automatic memory deletion
839 
840  // Build the graph of the communication scheme and create an appropriate indexset.
841  // calculate the neighbour vertices
842  int noNeighbours = oocomm.remoteIndices().neighbours();
843  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::RemoteIndices RemoteIndices;
844  typedef typename RemoteIndices::const_iterator
845  NeighbourIterator;
846 
847  for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
848  ++n)
849  if(n->first==rank){
850  //do not include ourselves.
851  --noNeighbours;
852  break;
853  }
854 
855  // A parmetis graph representing the communication graph.
856  // The diagonal entries are the number of nodes on the process.
857  // The offdiagonal entries are the number of edges leading to other processes.
858 
859  idxtype *xadj=new idxtype[2], *vwgt = 0;
860  idxtype *vtxdist=new idxtype[oocomm.communicator().size()+1];
861  idxtype * adjncy=new idxtype[noNeighbours], *adjwgt = 0;
862 
863  // each process has exactly one vertex!
864  for(int i=0; i<oocomm.communicator().size(); ++i)
865  vtxdist[i]=i;
866  vtxdist[oocomm.communicator().size()]=oocomm.communicator().size();
867 
868  xadj[0]=0;
869  xadj[1]=noNeighbours;
870 
871  // count edges to other processor
872  // a vector mapping the index to the owner
873  // std::vector<int> owner(mat.N(), oocomm.communicator().rank());
874  // for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
875  // ++n)
876  // {
877  // if(n->first!=oocomm.communicator().rank()){
878  // typedef typename RemoteIndices::RemoteIndexList RIList;
879  // const RIList& rlist = *(n->second.first);
880  // typedef typename RIList::const_iterator LIter;
881  // for(LIter entry=rlist.begin(); entry!=rlist.end(); ++entry){
882  // if(entry->attribute()==OwnerOverlapCopyAttributeSet::owner)
883  // owner[entry->localIndexPair().local()] = n->first;
884  // }
885  // }
886  // }
887 
888  // std::map<int,idxtype> edgecount; // edges to other processors
889  // typedef typename M::ConstRowIterator RIter;
890  // typedef typename M::ConstColIterator CIter;
891 
892  // // calculate edge count
893  // for(RIter row=mat.begin(), endr=mat.end(); row != endr; ++row)
894  // if(owner[row.index()]==OwnerOverlapCopyAttributeSet::owner)
895  // for(CIter entry= row->begin(), ende = row->end(); entry != ende; ++entry)
896  // ++edgecount[owner[entry.index()]];
897 
898  // setup edge and weight pattern
899  typedef typename RemoteIndices::const_iterator NeighbourIterator;
901  typedef typename IndexSet::LocalIndex LocalIndex;
902 
903  idxtype* adjp=adjncy;
904 
905 #ifdef USE_WEIGHTS
906  vwgt = new idxtype[1];
907  vwgt[0]= mat.N(); // weight is numer of rows TODO: Should actually be the nonzeros.
908 
909  adjwgt = new idxtype[noNeighbours];
910  idxtype* adjwp=adjwgt;
911 #endif
912 
913  for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
914  ++n)
915  if(n->first != rank){
916  *adjp=n->first;
917  ++adjp;
918 #ifdef USE_WEIGHTS
919  *adjwp=1;//edgecount[n->first];
920  ++adjwp;
921 #endif
922  }
923  assert(isValidGraph(vtxdist[rank+1]-vtxdist[rank],
924  vtxdist[oocomm.communicator().size()],
925  noNeighbours, xadj, adjncy, false));
926 
927  int wgtflag=0, numflag=0, edgecut;
928 #ifdef USE_WEIGHTS
929  wgtflag=3;
930 #endif
931  float *tpwgts = new float[nparts];
932  for(int i=0; i<nparts; ++i)
933  tpwgts[i]=1.0/nparts;
934  int options[5] ={ 0,1,15,0,0};
935  MPI_Comm comm=oocomm.communicator();
936 
937  Dune::dinfo<<rank<<" vtxdist: ";
938  print_carray(Dune::dinfo, vtxdist, oocomm.communicator().size()+1);
939  Dune::dinfo<<std::endl<<rank<<" xadj: ";
940  print_carray(Dune::dinfo, xadj, 2);
941  Dune::dinfo<<std::endl<<rank<<" adjncy: ";
942  print_carray(Dune::dinfo, adjncy, noNeighbours);
943 
944 #ifdef USE_WEIGHTS
945  Dune::dinfo<<std::endl<<rank<<" vwgt: ";
946  print_carray(Dune::dinfo, vwgt, 1);
947  Dune::dinfo<<std::endl<<rank<<" adwgt: ";
948  print_carray(Dune::dinfo, adjwgt, noNeighbours);
949 #endif
950  Dune::dinfo<<std::endl;
951  oocomm.communicator().barrier();
952  if(verbose && oocomm.communicator().rank()==0)
953  std::cout<<"Creating comm graph took "<<time.elapsed()<<std::endl;
954  time.reset();
955 
956 #ifdef PARALLEL_PARTITION
957  float ubvec = 1.15;
958  int ncon=1;
959 
960  //=======================================================
961  // ParMETIS_V3_PartKway
962  //=======================================================
963  ParMETIS_V3_PartKway(vtxdist, xadj, adjncy,
964  vwgt, adjwgt, &wgtflag,
965  &numflag, &ncon, &nparts, tpwgts, &ubvec, options, &edgecut, part,
966  &comm);
967  if(verbose && oocomm.communicator().rank()==0)
968  std::cout<<"ParMETIS took "<<time.elapsed()<<std::endl;
969  time.reset();
970 #else
971  Timer time1;
972  std::size_t gnoedges=0;
973  int* noedges = 0;
974  noedges = new int[oocomm.communicator().size()];
975  Dune::dverb<<"noNeighbours: "<<noNeighbours<<std::endl;
976  // gather number of edges for each vertex.
977  MPI_Allgather(&noNeighbours,1,MPI_INT,noedges,1, MPI_INT,oocomm.communicator());
978 
979  if(verbose && oocomm.communicator().rank()==0)
980  std::cout<<"Gathering noedges took "<<time1.elapsed()<<std::endl;
981  time1.reset();
982 
983  int noVertices = vtxdist[oocomm.communicator().size()];
984  idxtype *gxadj = 0;
985  idxtype *gvwgt = 0;
986  idxtype *gadjncy = 0;
987  idxtype *gadjwgt = 0;
988  idxtype *gpart = 0;
989  int* displ = 0;
990  int* noxs = 0;
991  int* xdispl = 0; // displacement for xadj
992  int* novs = 0;
993  int* vdispl=0; // real vertex displacement
994  std::size_t localNoVtx=vtxdist[rank+1]-vtxdist[rank];
995  std::size_t gxadjlen = vtxdist[oocomm.communicator().size()]-vtxdist[0]+oocomm.communicator().size();
996 
997  {
998  Dune::dinfo<<"noedges: ";
999  print_carray(Dune::dinfo, noedges, oocomm.communicator().size());
1000  Dune::dinfo<<std::endl;
1001  displ = new int[oocomm.communicator().size()];
1002  xdispl = new int[oocomm.communicator().size()];
1003  noxs = new int[oocomm.communicator().size()];
1004  vdispl = new int[oocomm.communicator().size()];
1005  novs = new int[oocomm.communicator().size()];
1006 
1007  for(int i=0; i < oocomm.communicator().size(); ++i){
1008  noxs[i]=vtxdist[i+1]-vtxdist[i]+1;
1009  novs[i]=vtxdist[i+1]-vtxdist[i];
1010  }
1011 
1012  idxtype *so= vtxdist;
1013  int offset = 0;
1014  for(int *xcurr = xdispl, *vcurr = vdispl, *end=vdispl+oocomm.communicator().size();
1015  vcurr!=end; ++vcurr, ++xcurr, ++so, ++offset){
1016  *vcurr = *so;
1017  *xcurr = offset + *so;
1018  }
1019 
1020  int *pdispl =displ;
1021  int cdispl = 0;
1022  *pdispl = 0;
1023  for(int *curr=noedges, *end=noedges+oocomm.communicator().size()-1;
1024  curr!=end; ++curr){
1025  ++pdispl; // next displacement
1026  cdispl += *curr; // next value
1027  *pdispl = cdispl;
1028  }
1029  Dune::dinfo<<"displ: ";
1030  print_carray(Dune::dinfo, displ, oocomm.communicator().size());
1031  Dune::dinfo<<std::endl;
1032 
1033  // calculate global number of edges
1034  // It is bigger than the actual one as we habe size-1 additional end entries
1035  for(int *curr=noedges, *end=noedges+oocomm.communicator().size();
1036  curr!=end; ++curr)
1037  gnoedges += *curr;
1038 
1039  // alocate gobal graph
1040  Dune::dinfo<<"gxadjlen: "<<gxadjlen<<" noVertices: "<<noVertices
1041  <<" gnoedges: "<<gnoedges<<std::endl;
1042  gxadj = new idxtype[gxadjlen];
1043  gpart = new idxtype[noVertices];
1044 #ifdef USE_WEIGHTS
1045  gvwgt = new idxtype[noVertices];
1046  gadjwgt = new idxtype[gnoedges];
1047 #endif
1048  gadjncy = new idxtype[gnoedges];
1049  }
1050 
1051  if(verbose && oocomm.communicator().rank()==0)
1052  std::cout<<"Preparing global graph took "<<time1.elapsed()<<std::endl;
1053  time1.reset();
1054  // Communicate data
1055 
1056  MPI_Allgatherv(xadj,2,MPITraits<idxtype>::getType(),
1057  gxadj,noxs,xdispl,MPITraits<idxtype>::getType(),
1058  comm);
1059  MPI_Allgatherv(adjncy,noNeighbours,MPITraits<idxtype>::getType(),
1060  gadjncy,noedges,displ,MPITraits<idxtype>::getType(),
1061  comm);
1062 #ifdef USE_WEIGHTS
1063  MPI_Allgatherv(adjwgt,noNeighbours,MPITraits<idxtype>::getType(),
1064  gadjwgt,noedges,displ,MPITraits<idxtype>::getType(),
1065  comm);
1066  MPI_Allgatherv(vwgt,localNoVtx,MPITraits<idxtype>::getType(),
1067  gvwgt,novs,vdispl,MPITraits<idxtype>::getType(),
1068  comm);
1069 #endif
1070  if(verbose && oocomm.communicator().rank()==0)
1071  std::cout<<"Gathering global graph data took "<<time1.elapsed()<<std::endl;
1072  time1.reset();
1073 
1074  {
1075  // create the real gxadj array
1076  // i.e. shift entries and add displacements.
1077 
1078  print_carray(Dune::dinfo, gxadj, gxadjlen);
1079 
1080  int offset = 0;
1081  idxtype increment = vtxdist[1];
1082  idxtype *start=gxadj+1;
1083  for(int i=1; i<oocomm.communicator().size(); ++i){
1084  offset+=1;
1085  int lprev = vtxdist[i]-vtxdist[i-1];
1086  int l = vtxdist[i+1]-vtxdist[i];
1087  start+=lprev;
1088  assert((start+l+offset)-gxadj<=static_cast<idxtype>(gxadjlen));
1089  increment = *(start-1);
1090  std::transform(start+offset, start+l+offset, start, std::bind2nd(std::plus<idxtype>(), increment));
1091  }
1092  Dune::dinfo<<std::endl<<"shifted xadj:";
1093  print_carray(Dune::dinfo, gxadj, noVertices+1);
1094  Dune::dinfo<<std::endl<<" gadjncy: ";
1095  print_carray(Dune::dinfo, gadjncy, gnoedges);
1096 #ifdef USE_WEIGHTS
1097  Dune::dinfo<<std::endl<<" gvwgt: ";
1098  print_carray(Dune::dinfo, gvwgt, noVertices);
1099  Dune::dinfo<<std::endl<<"adjwgt: ";
1100  print_carray(Dune::dinfo, gadjwgt, gnoedges);
1101  Dune::dinfo<<std::endl;
1102 #endif
1103  // everything should be fine now!!!
1104  if(verbose && oocomm.communicator().rank()==0)
1105  std::cout<<"Postprocesing global graph data took "<<time1.elapsed()<<std::endl;
1106  time1.reset();
1107 #ifndef NDEBUG
1108  assert(isValidGraph(noVertices, noVertices, gnoedges,
1109  gxadj, gadjncy, true));
1110 #endif
1111 
1112  if(verbose && oocomm.communicator().rank()==0)
1113  std::cout<<"Creating grah one 1 process took "<<time.elapsed()<<std::endl;
1114  time.reset();
1115  options[0]=0; options[1]=1; options[2]=1; options[3]=3; options[4]=3;
1116  // Call metis
1117  METIS_PartGraphRecursive(&noVertices, gxadj, gadjncy, gvwgt, gadjwgt, &wgtflag,
1118  &numflag, &nparts, options, &edgecut, gpart);
1119 
1120  if(verbose && oocomm.communicator().rank()==0)
1121  std::cout<<"METIS took "<<time.elapsed()<<std::endl;
1122  time.reset();
1123 
1124  Dune::dinfo<<std::endl<<"part:";
1125  print_carray(Dune::dinfo, gpart, noVertices);
1126 
1127  delete[] gxadj;
1128  delete[] gadjncy;
1129 #ifdef USE_WEIGHTS
1130  delete[] gvwgt;
1131  delete[] gadjwgt;
1132 #endif
1133  }
1134  // Scatter result
1135  MPI_Scatter(gpart, 1, MPITraits<idxtype>::getType(), part, 1,
1136  MPITraits<idxtype>::getType(), 0, comm);
1137 
1138  {
1139  // release remaining memory
1140  delete[] gpart;
1141  delete[] noedges;
1142  delete[] displ;
1143  }
1144 
1145 
1146 #endif
1147  delete[] xadj;
1148  delete[] vtxdist;
1149  delete[] adjncy;
1150 #ifdef USE_WEIGHTS
1151  delete[] vwgt;
1152  delete[] adjwgt;
1153 #endif
1154  delete[] tpwgts;
1155  }
1156  }else{
1157  part[0]=0;
1158  }
1159 #endif
1160  Dune::dinfo<<" repart "<<rank <<" -> "<< part[0]<<std::endl;
1161 
1162  std::vector<int> realpart(mat.N(), part[0]);
1163  delete[] part;
1164 
1165  oocomm.copyOwnerToAll(realpart, realpart);
1166 
1167  if(verbose && oocomm.communicator().rank()==0)
1168  std::cout<<"Scattering repartitioning took "<<time.elapsed()<<std::endl;
1169  time.reset();
1170 
1171 
1172  oocomm.buildGlobalLookup(mat.N());
1173  Dune::Amg::MatrixGraph<M> graph(const_cast<M&>(mat));
1174  fillIndexSetHoles(graph, oocomm);
1175  if(verbose && oocomm.communicator().rank()==0)
1176  std::cout<<"Filling index set took "<<time.elapsed()<<std::endl;
1177  time.reset();
1178 
1179  if(verbose){
1180  int noNeighbours=oocomm.remoteIndices().neighbours();
1181  noNeighbours = oocomm.communicator().sum(noNeighbours)
1182  / oocomm.communicator().size();
1183  if(oocomm.communicator().rank()==0)
1184  std::cout<<"Average no neighbours was "<<noNeighbours<<std::endl;
1185  }
1186  bool ret = buildCommunication(graph, realpart, oocomm, outcomm, redistInf,
1187  verbose);
1188  if(verbose && oocomm.communicator().rank()==0)
1189  std::cout<<"Building index sets took "<<time.elapsed()<<std::endl;
1190  time.reset();
1191 
1192 
1193  return ret;
1194 
1195  }
1196 
1211  template<class G, class T1, class T2>
1212  bool graphRepartition(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, int nparts,
1214  RedistributeInterface& redistInf,
1215  bool verbose=false)
1216  {
1217  Timer time;
1218 
1219  MPI_Comm comm=oocomm.communicator();
1220  oocomm.buildGlobalLookup(graph.noVertices());
1221  fillIndexSetHoles(graph, oocomm);
1222 
1223  if(verbose && oocomm.communicator().rank()==0)
1224  std::cout<<"Filling holes took "<<time.elapsed()<<std::endl;
1225  time.reset();
1226 
1227  // simple precondition checks
1228 
1229 #ifdef PERF_REPART
1230  // Profiling variables
1231  double t1=0.0, t2=0.0, t3=0.0, t4=0.0, tSum=0.0;
1232 #endif
1233 
1234 
1235  // MPI variables
1236  int mype = oocomm.communicator().rank();
1237 
1238  assert(nparts<=oocomm.communicator().size());
1239 
1240  int myDomain;
1241 
1242  //
1243  // 1) Prepare the required parameters for using ParMETIS
1244  // Especially the arrays that represent the graph must be
1245  // generated by the DUNE Graph and IndexSet input variables.
1246  // These are the arrays:
1247  // - vtxdist
1248  // - xadj
1249  // - adjncy
1250  //
1251  //
1252 #ifdef PERF_REPART
1253  // reset timer for step 1)
1254  t1=MPI_Wtime();
1255 #endif
1256 
1257 
1258  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm;
1259  typedef typename OOComm::OwnerSet OwnerSet;
1260 
1261  // Create the vtxdist array and parmetisVtxMapping.
1262  // Global communications are necessary
1263  // The parmetis global identifiers for the owner vertices.
1264  ParmetisDuneIndexMap indexMap(graph,oocomm);
1265 #if HAVE_PARMETIS
1266  idxtype *part = new idxtype[indexMap.numOfOwnVtx()];
1267 #else
1268  std::size_t *part = new std::size_t[indexMap.numOfOwnVtx()];
1269 #endif
1270  for(std::size_t i=0; i < indexMap.numOfOwnVtx(); ++i)
1271  part[i]=mype;
1272 
1273 #if !HAVE_PARMETIS
1274  if(oocomm.communicator().rank()==0 && nparts>1)
1275  std::cerr<<"ParMETIS not activated. Will repartition to 1 domain instead of requested "
1276  <<nparts<<" domains."<<std::endl;
1277  nparts=1; // No parmetis available, fallback to agglomerating to 1 process
1278 
1279 #else
1280 
1281  if(nparts>1){
1282  // Create the xadj and adjncy arrays
1283  idxtype *xadj = new idxtype[indexMap.numOfOwnVtx()+1];
1284  idxtype *adjncy = new idxtype[graph.noEdges()];
1285  EdgeFunctor<G> ef(adjncy, indexMap, graph.noEdges());
1286  getAdjArrays<OwnerSet>(graph, oocomm.globalLookup(), xadj, ef);
1287 
1288  //
1289  // 2) Call ParMETIS
1290  //
1291  //
1292  int numflag=0, wgtflag=0, options[3], edgecut=0, ncon=1;
1293  //float *tpwgts = NULL;
1294  float *tpwgts = new float[nparts];
1295  for(int i=0; i<nparts; ++i)
1296  tpwgts[i]=1.0/nparts;
1297  float ubvec[1];
1298  options[0] = 0; // 0=default, 1=options are defined in [1]+[2]
1299 #ifdef DEBUG_REPART
1300  options[1] = 3; // show info: 0=no message
1301 #else
1302  options[1] = 0; // show info: 0=no message
1303 #endif
1304  options[2] = 1; // random number seed, default is 15
1305  wgtflag = (ef.getWeights()!=NULL)?1:0;
1306  numflag = 0;
1307  edgecut = 0;
1308  ncon=1;
1309  ubvec[0]=1.05; // recommended by ParMETIS
1310 
1311 #ifdef DEBUG_REPART
1312  if (mype == 0) {
1313  std::cout<<std::endl;
1314  std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1315  <<options[1]<<" "<<options[2]<<"}, Ncon: "
1316  <<ncon<<", Nparts: "<<nparts<<std::endl;
1317  }
1318 #endif
1319 #ifdef PERF_REPART
1320  // stop the time for step 1)
1321  t1=MPI_Wtime()-t1;
1322  // reset timer for step 2)
1323  t2=MPI_Wtime();
1324 #endif
1325 
1326  if(verbose){
1327  oocomm.communicator().barrier();
1328  if(oocomm.communicator().rank()==0)
1329  std::cout<<"Preparing for parmetis took "<<time.elapsed()<<std::endl;
1330  }
1331  time.reset();
1332 
1333  //=======================================================
1334  // ParMETIS_V3_PartKway
1335  //=======================================================
1336  ParMETIS_V3_PartKway(indexMap.vtxDist(), xadj, adjncy,
1337  NULL, ef.getWeights(), &wgtflag,
1338  &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &const_cast<MPI_Comm&>(comm));
1339 
1340 
1341  delete[] xadj;
1342  delete[] adjncy;
1343  delete[] tpwgts;
1344 
1345  ef.free();
1346 
1347 #ifdef DEBUG_REPART
1348  if (mype == 0) {
1349  std::cout<<std::endl;
1350  std::cout<<"ParMETIS_V3_PartKway reported a cut of "<<edgecut<<std::endl;
1351  std::cout<<std::endl;
1352  }
1353  std::cout<<mype<<": PARMETIS-Result: ";
1354  for(int i=0; i < indexMap.vtxDist()[mype+1]-indexMap.vtxDist()[mype]; ++i) {
1355  std::cout<<part[i]<<" ";
1356  }
1357  std::cout<<std::endl;
1358  std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1359  <<options[1]<<" "<<options[2]<<"}, Ncon: "
1360  <<ncon<<", Nparts: "<<nparts<<std::endl;
1361 #endif
1362 #ifdef PERF_REPART
1363  // stop the time for step 2)
1364  t2=MPI_Wtime()-t2;
1365  // reset timer for step 3)
1366  t3=MPI_Wtime();
1367 #endif
1368 
1369 
1370  if(verbose){
1371  oocomm.communicator().barrier();
1372  if(oocomm.communicator().rank()==0)
1373  std::cout<<"Parmetis took "<<time.elapsed()<<std::endl;
1374  }
1375  time.reset();
1376  }else
1377 #endif
1378  {
1379  // Everything goes to process 0!
1380  for(std::size_t i=0; i<indexMap.numOfOwnVtx();++i)
1381  part[i]=0;
1382  }
1383 
1384 
1385  //
1386  // 3) Find a optimal domain based on the ParMETIS repatitioning
1387  // result
1388  //
1389 
1390  std::vector<int> domainMapping(nparts);
1391  if(nparts>1)
1392  getDomain(comm, part, indexMap.numOfOwnVtx(), nparts, &myDomain, domainMapping);
1393  else
1394  domainMapping[0]=0;
1395 
1396 #ifdef DEBUG_REPART
1397  std::cout<<mype<<": myDomain: "<<myDomain<<std::endl;
1398  std::cout<<mype<<": DomainMapping: ";
1399  for(int j=0; j<nparts; j++) {
1400  std::cout<<" do: "<<j<<" pe: "<<domainMapping[j]<<" ";
1401  }
1402  std::cout<<std::endl;
1403 #endif
1404 
1405  // Make a domain mapping for the indexset and translate
1406  //domain number to real process number
1407  // domainMapping is the one of parmetis, that is without
1408  // the overlap/copy vertices
1409  std::vector<int> setPartition(oocomm.indexSet().size(), -1);
1410 
1411  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
1412  std::size_t i=0; // parmetis index
1413  for(Iterator index = oocomm.indexSet().begin(); index != oocomm.indexSet().end(); ++index)
1414  if(OwnerSet::contains(index->local().attribute())){
1415  setPartition[index->local()]=domainMapping[part[i++]];
1416  }
1417 
1418  delete[] part;
1419  oocomm.copyOwnerToAll(setPartition, setPartition);
1420  // communication only needed for ALU
1421  // (ghosts with same global id as owners on the same process)
1422  if (oocomm.getSolverCategory() ==
1423  static_cast<int>(SolverCategory::nonoverlapping))
1424  oocomm.copyCopyToAll(setPartition, setPartition);
1425  bool ret = buildCommunication(graph, setPartition, oocomm, outcomm, redistInf,
1426  verbose);
1427  if(verbose){
1428  oocomm.communicator().barrier();
1429  if(oocomm.communicator().rank()==0)
1430  std::cout<<"Creating indexsets took "<<time.elapsed()<<std::endl;
1431  }
1432  return ret;
1433  }
1434 
1435 
1436 
1437  template<class G, class T1, class T2>
1438  bool buildCommunication(const G& graph,
1439  std::vector<int>& setPartition, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm,
1441  RedistributeInterface& redistInf,
1442  bool verbose)
1443  {
1444  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm;
1445  typedef typename OOComm::OwnerSet OwnerSet;
1446 
1447  Timer time;
1448 
1449  // Build the send interface
1450  redistInf.buildSendInterface<OwnerSet>(setPartition, oocomm.indexSet());
1451 
1452 #ifdef PERF_REPART
1453  // stop the time for step 3)
1454  t3=MPI_Wtime()-t3;
1455  // reset timer for step 4)
1456  t4=MPI_Wtime();
1457 #endif
1458 
1459 
1460  //
1461  // 4) Create the output IndexSet and RemoteIndices
1462  // 4.1) Determine the "send to" and "receive from" relation
1463  // according to the new partition using a MPI ring
1464  // communication.
1465  //
1466  // 4.2) Depends on the "send to" and "receive from" vector,
1467  // the processes will exchange the vertices each other
1468  //
1469  // 4.3) Create the IndexSet, RemoteIndices and the new MPI
1470  // communicator
1471  //
1472 
1473  //
1474  // 4.1) Let's start...
1475  //
1476  int npes = oocomm.communicator().size();
1477  int *sendTo = 0;
1478  int noSendTo = 0;
1479  std::set<int> recvFrom;
1480 
1481  // the max number of vertices is stored in the sendTo buffer,
1482  // not the number of vertices to send! Because the max number of Vtx
1483  // is used as the fixed buffer size by the MPI send/receive calls
1484 
1485  typedef typename std::vector<int>::const_iterator VIter;
1486  int mype = oocomm.communicator().rank();
1487 
1488  {
1489  std::set<int> tsendTo;
1490  for(VIter i=setPartition.begin(), iend = setPartition.end(); i!=iend; ++i)
1491  tsendTo.insert(*i);
1492 
1493  noSendTo = tsendTo.size();
1494  sendTo = new int[noSendTo];
1495  typedef std::set<int>::const_iterator iterator;
1496  int idx=0;
1497  for(iterator i=tsendTo.begin(); i != tsendTo.end(); ++i, ++idx)
1498  sendTo[idx]=*i;
1499  }
1500 
1501  //
1502  int* gnoSend= new int[oocomm.communicator().size()];
1503  int* gsendToDispl = new int[oocomm.communicator().size()+1];
1504 
1505  MPI_Allgather(&noSendTo, 1, MPI_INT, gnoSend, 1,
1506  MPI_INT, oocomm.communicator());
1507 
1508  // calculate total receive message size
1509  int totalNoRecv = 0;
1510  for(int i=0; i<npes; ++i)
1511  totalNoRecv += gnoSend[i];
1512 
1513  int *gsendTo = new int[totalNoRecv];
1514 
1515  // calculate displacement for allgatherv
1516  gsendToDispl[0]=0;
1517  for(int i=0; i<npes; ++i)
1518  gsendToDispl[i+1]=gsendToDispl[i]+gnoSend[i];
1519 
1520  // gather the data
1521  MPI_Allgatherv(sendTo, noSendTo, MPI_INT, gsendTo, gnoSend, gsendToDispl,
1522  MPI_INT, oocomm.communicator());
1523 
1524  // Extract from which processes we will receive data
1525  for(int proc=0; proc < npes; ++proc)
1526  for(int i=gsendToDispl[proc]; i < gsendToDispl[proc+1]; ++i)
1527  if(gsendTo[i]==mype)
1528  recvFrom.insert(proc);
1529 
1530  bool existentOnNextLevel = recvFrom.size()>0;
1531 
1532  // Delete memory
1533  delete[] gnoSend;
1534  delete[] gsendToDispl;
1535  delete[] gsendTo;
1536 
1537 
1538 #ifdef DEBUG_REPART
1539  if(recvFrom.size()){
1540  std::cout<<mype<<": recvFrom: ";
1541  typedef typename std::set<int>::const_iterator siter;
1542  for(siter i=recvFrom.begin(); i!= recvFrom.end(); ++i) {
1543  std::cout<<*i<<" ";
1544  }
1545  }
1546 
1547  std::cout<<std::endl<<std::endl;
1548  std::cout<<mype<<": sendTo: ";
1549  for(int i=0; i<noSendTo; i++) {
1550  std::cout<<sendTo[i]<<" ";
1551  }
1552  std::cout<<std::endl<<std::endl;
1553 #endif
1554 
1555  if(verbose)
1556  if(oocomm.communicator().rank()==0)
1557  std::cout<<" Communicating the receive information took "<<
1558  time.elapsed()<<std::endl;
1559  time.reset();
1560 
1561  //
1562  // 4.2) Start the communication
1563  //
1564 
1565  // Get all the owner and overlap vertices for myself ans save
1566  // it in the vectors myOwnerVec and myOverlapVec.
1567  // The received vertices from the other processes are simple
1568  // added to these vector.
1569  //
1570 
1571 
1572  typedef typename OOComm::ParallelIndexSet::GlobalIndex GI;
1573  typedef std::vector<GI> GlobalVector;
1574  std::vector<std::pair<GI,int> > myOwnerVec;
1575  std::set<GI> myOverlapSet;
1576  GlobalVector sendOwnerVec;
1577  std::set<GI> sendOverlapSet;
1578  std::set<int> myNeighbors;
1579 
1580  // getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
1581  // mype, mype, myOwnerVec, myOverlapSet, redistInf, myNeighbors);
1582 
1583  char **sendBuffers=new char*[noSendTo];
1584  MPI_Request *requests = new MPI_Request[noSendTo];
1585 
1586  // Create all messages to be sent
1587  for(int i=0; i < noSendTo; ++i){
1588  // clear the vector for sending
1589  sendOwnerVec.clear();
1590  sendOverlapSet.clear();
1591  // get all owner and overlap vertices for process j and save these
1592  // in the vectors sendOwnerVec and sendOverlapSet
1593  std::set<int> neighbors;
1594  getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
1595  mype, sendTo[i], sendOwnerVec, sendOverlapSet, redistInf,
1596  neighbors);
1597  // +2, we need 2 integer more for the length of each part
1598  // (owner/overlap) of the array
1599  int buffersize=0;
1600  int tsize;
1601  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &buffersize);
1602  MPI_Pack_size(sendOwnerVec.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1603  buffersize +=tsize;
1604  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1605  buffersize +=tsize;
1606  MPI_Pack_size(sendOverlapSet.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1607  buffersize += tsize;
1608  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1609  buffersize += tsize;
1610  MPI_Pack_size(neighbors.size(), MPI_INT, oocomm.communicator(), &tsize);
1611  buffersize += tsize;
1612 
1613  sendBuffers[i] = new char[buffersize];
1614 
1615 #ifdef DEBUG_REPART
1616  std::cout<<mype<<" sending "<<sendOwnerVec.size()<<" owner and "<<
1617  sendOverlapSet.size()<<" overlap to "<<sendTo[i]<<" buffersize="<<buffersize<<std::endl;
1618 #endif
1619  createSendBuf(sendOwnerVec, sendOverlapSet, neighbors, sendBuffers[i], buffersize, oocomm.communicator());
1620  MPI_Issend(sendBuffers[i], buffersize, MPI_PACKED, sendTo[i], 99, oocomm.communicator(), requests+i);
1621  }
1622 
1623  if(verbose){
1624  oocomm.communicator().barrier();
1625  if(oocomm.communicator().rank()==0)
1626  std::cout<<" Creating sends took "<<
1627  time.elapsed()<<std::endl;
1628  }
1629  time.reset();
1630 
1631  // Receive Messages
1632  int noRecv = recvFrom.size();
1633  int oldbuffersize=0;
1634  char* recvBuf = 0;
1635  while(noRecv>0){
1636  // probe for an incoming message
1637  MPI_Status stat;
1638  MPI_Probe(MPI_ANY_SOURCE, 99, oocomm.communicator(), &stat);
1639  int buffersize;
1640  MPI_Get_count(&stat, MPI_PACKED, &buffersize);
1641 
1642  if(oldbuffersize<buffersize){
1643  // buffer too small, reallocate
1644  delete[] recvBuf;
1645  recvBuf = new char[buffersize];
1646  oldbuffersize = buffersize;
1647  }
1648  MPI_Recv(recvBuf, buffersize, MPI_PACKED, stat.MPI_SOURCE, 99, oocomm.communicator(), &stat);
1649  saveRecvBuf(recvBuf, buffersize, myOwnerVec, myOverlapSet, myNeighbors, redistInf,
1650  stat.MPI_SOURCE, oocomm.communicator());
1651  --noRecv;
1652  }
1653 
1654  if(recvBuf)
1655  delete[] recvBuf;
1656 
1657  time.reset();
1658  // Wait for sending messages to complete
1659  MPI_Status *statuses = new MPI_Status[noSendTo];
1660  int send = MPI_Waitall(noSendTo, requests, statuses);
1661 
1662  // check for errors
1663  if(send==MPI_ERR_IN_STATUS){
1664  std::cerr<<mype<<": Error in sending :"<<std::endl;
1665  // Search for the error
1666  for(int i=0; i< noSendTo; i++)
1667  if(statuses[i].MPI_ERROR!=MPI_SUCCESS){
1668  char message[300];
1669  int messageLength;
1670  MPI_Error_string(statuses[i].MPI_ERROR, message, &messageLength);
1671  std::cerr<<" source="<<statuses[i].MPI_SOURCE<<" message: ";
1672  for(int i=0; i< messageLength; i++)
1673  std::cout<<message[i];
1674  }
1675  std::cerr<<std::endl;
1676  }
1677 
1678  if(verbose){
1679  oocomm.communicator().barrier();
1680  if(oocomm.communicator().rank()==0)
1681  std::cout<<" Receiving and saving took "<<
1682  time.elapsed()<<std::endl;
1683  }
1684  time.reset();
1685 
1686  for(int i=0; i < noSendTo; ++i)
1687  delete[] sendBuffers[i];
1688 
1689  delete[] sendBuffers;
1690  delete[] statuses;
1691  delete[] requests;
1692 
1693  redistInf.setCommunicator(oocomm.communicator());
1694 
1695  //
1696  // 4.2) Create the IndexSet etc.
1697  //
1698 
1699  // build the new outputIndexSet
1700 
1701 
1702  int color=0;
1703 
1704  if (!existentOnNextLevel) {
1705  // this process is not used anymore
1706  color= MPI_UNDEFINED;
1707  }
1708  MPI_Comm outputComm;
1709 
1710  MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().rank(), &outputComm);
1711  outcomm = new OOComm(outputComm,oocomm.getSolverCategory(),true);
1712 
1713  // translate neighbor ranks.
1714  int newrank=outcomm->communicator().rank();
1715  int *newranks=new int[oocomm.communicator().size()];
1716  std::vector<int> tneighbors;
1717  tneighbors.reserve(myNeighbors.size());
1718 
1719  typename OOComm::ParallelIndexSet& outputIndexSet = outcomm->indexSet();
1720 
1721  MPI_Allgather(&newrank, 1, MPI_INT, newranks, 1,
1722  MPI_INT, oocomm.communicator());
1723  typedef typename std::set<int>::const_iterator IIter;
1724 
1725 #ifdef DEBUG_REPART
1726  std::cout<<oocomm.communicator().rank()<<" ";
1727  for(IIter i=myNeighbors.begin(), end=myNeighbors.end();
1728  i!=end; ++i){
1729  assert(newranks[*i]>=0);
1730  std::cout<<*i<<"->"<<newranks[*i]<<" ";
1731  tneighbors.push_back(newranks[*i]);
1732  }
1733  std::cout<<std::endl;
1734 #else
1735  for(IIter i=myNeighbors.begin(), end=myNeighbors.end();
1736  i!=end; ++i){
1737  tneighbors.push_back(newranks[*i]);
1738  }
1739 #endif
1740  delete[] newranks;
1741  myNeighbors.clear();
1742 
1743  if(verbose){
1744  oocomm.communicator().barrier();
1745  if(oocomm.communicator().rank()==0)
1746  std::cout<<" Calculating new neighbours ("<<tneighbors.size()<<") took "<<
1747  time.elapsed()<<std::endl;
1748  }
1749  time.reset();
1750 
1751 
1752  outputIndexSet.beginResize();
1753  // 1) add the owner vertices
1754  // Sort the owners
1755  std::sort(myOwnerVec.begin(), myOwnerVec.end(), SortFirst());
1756  // The owners are sorted according to there global index
1757  // Therefore the entries of ownerVec are the same as the
1758  // ones in the resulting index set.
1759  typedef typename OOComm::ParallelIndexSet::LocalIndex LocalIndex;
1760  typedef typename std::vector<std::pair<GI,int> >::const_iterator VPIter;
1761  int i=0;
1762  for(VPIter g=myOwnerVec.begin(), end =myOwnerVec.end(); g!=end; ++g, ++i ) {
1763  outputIndexSet.add(g->first,LocalIndex(i, OwnerOverlapCopyAttributeSet::owner, true));
1764  redistInf.addReceiveIndex(g->second, i);
1765  }
1766 
1767  if(verbose){
1768  oocomm.communicator().barrier();
1769  if(oocomm.communicator().rank()==0)
1770  std::cout<<" Adding owner indices took "<<
1771  time.elapsed()<<std::endl;
1772  }
1773  time.reset();
1774 
1775 
1776  // After all the vertices are received, the vectors must
1777  // be "merged" together to create the final vectors.
1778  // Because some vertices that are sent as overlap could now
1779  // already included as owner vertiecs in the new partition
1780  mergeVec(myOwnerVec, myOverlapSet);
1781 
1782  // Trick to free memory
1783  myOwnerVec.clear();
1784  myOwnerVec.swap(myOwnerVec);
1785 
1786  if(verbose){
1787  oocomm.communicator().barrier();
1788  if(oocomm.communicator().rank()==0)
1789  std::cout<<" Merging indices took "<<
1790  time.elapsed()<<std::endl;
1791  }
1792  time.reset();
1793 
1794 
1795  // 2) add the overlap vertices
1796  typedef typename std::set<GI>::const_iterator SIter;
1797  for(SIter g=myOverlapSet.begin(), end=myOverlapSet.end(); g!=end; ++g, i++) {
1798  outputIndexSet.add(*g,LocalIndex(i, OwnerOverlapCopyAttributeSet::copy, true));
1799  }
1800  myOverlapSet.clear();
1801  outputIndexSet.endResize();
1802 
1803 #ifdef DUNE_ISTL_WITH_CHECKING
1804  int numOfOwnVtx =0;
1805  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
1806  Iterator end = outputIndexSet.end();
1807  for(Iterator index = outputIndexSet.begin(); index != end; ++index) {
1808  if (OwnerSet::contains(index->local().attribute())) {
1809  numOfOwnVtx++;
1810  }
1811  }
1812  numOfOwnVtx = oocomm.communicator().sum(numOfOwnVtx);
1813  // if(numOfOwnVtx!=indexMap.globalOwnerVertices)
1814  // {
1815  // std::cerr<<numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones!"<<std::endl;
1816  // DUNE_THROW(ISTLError, numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones"
1817  // <<" during repartitioning.");
1818  // }
1819  Iterator index=outputIndexSet.begin();
1820  if(index!=end){
1821  ++index;
1822  for(Iterator old = outputIndexSet.begin(); index != end; old=index++) {
1823  if(old->global()>index->global())
1824  DUNE_THROW(ISTLError, "Index set's globalindex not sorted correctly");
1825  }
1826  }
1827 #endif
1828  if(verbose){
1829  oocomm.communicator().barrier();
1830  if(oocomm.communicator().rank()==0)
1831  std::cout<<" Adding overlap indices took "<<
1832  time.elapsed()<<std::endl;
1833  }
1834  time.reset();
1835 
1836 
1837  if(color != MPI_UNDEFINED){
1838  outcomm->remoteIndices().setNeighbours(tneighbors);
1839  outcomm->remoteIndices().template rebuild<true>();
1840 
1841  }
1842 
1843  // release the memory
1844  delete[] sendTo;
1845 
1846  if(verbose){
1847  oocomm.communicator().barrier();
1848  if(oocomm.communicator().rank()==0)
1849  std::cout<<" Storing indexsets took "<<
1850  time.elapsed()<<std::endl;
1851  }
1852 
1853 #ifdef PERF_REPART
1854  // stop the time for step 4) and print the results
1855  t4=MPI_Wtime()-t4;
1856  tSum = t1 + t2 + t3 + t4;
1857  std::cout<<std::endl
1858  <<mype<<": WTime for step 1): "<<t1
1859  <<" 2): "<<t2
1860  <<" 3): "<<t3
1861  <<" 4): "<<t4
1862  <<" total: "<<tSum
1863  <<std::endl;
1864 #endif
1865 
1866  return color!=MPI_UNDEFINED;
1867 
1868  }
1869 #else
1870  template<class G, class P,class T1, class T2, class R>
1871  bool graphRepartition(const G& graph, P& oocomm, int nparts,
1872  P*& outcomm,
1873  R& redistInf,
1874  bool v=false)
1875  {
1876  if(nparts!=oocomm.size())
1877  DUNE_THROW(NotImplemented, "only available for MPI programs");
1878  }
1879 
1880 
1881  template<class G, class P,class T1, class T2, class R>
1882  bool commGraphRepartition(const G& graph, P& oocomm, int nparts,
1883  P*& outcomm,
1884  R& redistInf,
1885  bool v=false)
1886  {
1887  if(nparts!=oocomm.size())
1888  DUNE_THROW(NotImplemented, "only available for MPI programs");
1889  }
1890 #endif // HAVE_MPI
1891 } // end of namespace Dune
1892 #endif