1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL; 2309 PetscScalar *va,*vb; 2310 Vec vtmp; 2311 2312 PetscFunctionBegin; 2313 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2314 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2315 if (idx) { 2316 for (i=0; i<A->rmap->n; i++) { 2317 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2318 } 2319 } 2320 2321 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2322 if (idx) { 2323 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2324 } 2325 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2326 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2327 2328 for (i=0; i<A->rmap->n; i++) { 2329 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2330 va[i] = vb[i]; 2331 if (idx) idx[i] = a->garray[idxb[i]]; 2332 } 2333 } 2334 2335 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2337 ierr = PetscFree(idxb);CHKERRQ(ierr); 2338 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 PetscInt i,*idxb = NULL; 2347 PetscScalar *va,*vb; 2348 Vec vtmp; 2349 2350 PetscFunctionBegin; 2351 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2352 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2353 if (idx) { 2354 for (i=0; i<A->cmap->n; i++) { 2355 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2356 } 2357 } 2358 2359 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2360 if (idx) { 2361 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2362 } 2363 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2364 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2365 2366 for (i=0; i<A->rmap->n; i++) { 2367 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2368 va[i] = vb[i]; 2369 if (idx) idx[i] = a->garray[idxb[i]]; 2370 } 2371 } 2372 2373 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2375 ierr = PetscFree(idxb);CHKERRQ(ierr); 2376 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2381 { 2382 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2383 PetscInt m = A->rmap->n,n = A->cmap->n; 2384 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2385 PetscInt *cmap = mat->garray; 2386 PetscInt *diagIdx, *offdiagIdx; 2387 Vec diagV, offdiagV; 2388 PetscScalar *a, *diagA, *offdiagA, *ba; 2389 PetscInt r,j,col,ncols,*bi,*bj; 2390 PetscErrorCode ierr; 2391 Mat B = mat->B; 2392 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2393 2394 PetscFunctionBegin; 2395 /* When a process holds entire A and other processes have no entry */ 2396 if (A->cmap->N == n) { 2397 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2398 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2399 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2400 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2401 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2402 PetscFunctionReturn(0); 2403 } else if (n == 0) { 2404 if (m) { 2405 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2406 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2407 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2408 } 2409 PetscFunctionReturn(0); 2410 } 2411 2412 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2413 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2414 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2415 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2416 2417 /* Get offdiagIdx[] for implicit 0.0 */ 2418 ba = b->a; 2419 bi = b->i; 2420 bj = b->j; 2421 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2422 for (r = 0; r < m; r++) { 2423 ncols = bi[r+1] - bi[r]; 2424 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2425 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2426 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2427 offdiagA[r] = 0.0; 2428 2429 /* Find first hole in the cmap */ 2430 for (j=0; j<ncols; j++) { 2431 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2432 if (col > j && j < cstart) { 2433 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2434 break; 2435 } else if (col > j + n && j >= cstart) { 2436 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2437 break; 2438 } 2439 } 2440 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2441 /* a hole is outside compressed Bcols */ 2442 if (ncols == 0) { 2443 if (cstart) { 2444 offdiagIdx[r] = 0; 2445 } else offdiagIdx[r] = cend; 2446 } else { /* ncols > 0 */ 2447 offdiagIdx[r] = cmap[ncols-1] + 1; 2448 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2449 } 2450 } 2451 } 2452 2453 for (j=0; j<ncols; j++) { 2454 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2455 ba++; bj++; 2456 } 2457 } 2458 2459 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2460 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2461 for (r = 0; r < m; ++r) { 2462 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2463 a[r] = diagA[r]; 2464 if (idx) idx[r] = cstart + diagIdx[r]; 2465 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2466 a[r] = diagA[r]; 2467 if (idx) { 2468 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2469 idx[r] = cstart + diagIdx[r]; 2470 } else idx[r] = offdiagIdx[r]; 2471 } 2472 } else { 2473 a[r] = offdiagA[r]; 2474 if (idx) idx[r] = offdiagIdx[r]; 2475 } 2476 } 2477 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2478 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2479 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2480 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2481 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2482 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2487 { 2488 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2489 PetscInt m = A->rmap->n,n = A->cmap->n; 2490 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2491 PetscInt *cmap = mat->garray; 2492 PetscInt *diagIdx, *offdiagIdx; 2493 Vec diagV, offdiagV; 2494 PetscScalar *a, *diagA, *offdiagA, *ba; 2495 PetscInt r,j,col,ncols,*bi,*bj; 2496 PetscErrorCode ierr; 2497 Mat B = mat->B; 2498 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2499 2500 PetscFunctionBegin; 2501 /* When a process holds entire A and other processes have no entry */ 2502 if (A->cmap->N == n) { 2503 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2504 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2505 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2506 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2507 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } else if (n == 0) { 2510 if (m) { 2511 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2512 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2513 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2514 } 2515 PetscFunctionReturn(0); 2516 } 2517 2518 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2519 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2520 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2521 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2522 2523 /* Get offdiagIdx[] for implicit 0.0 */ 2524 ba = b->a; 2525 bi = b->i; 2526 bj = b->j; 2527 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2528 for (r = 0; r < m; r++) { 2529 ncols = bi[r+1] - bi[r]; 2530 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2531 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2532 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2533 offdiagA[r] = 0.0; 2534 2535 /* Find first hole in the cmap */ 2536 for (j=0; j<ncols; j++) { 2537 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2538 if (col > j && j < cstart) { 2539 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2540 break; 2541 } else if (col > j + n && j >= cstart) { 2542 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2543 break; 2544 } 2545 } 2546 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2547 /* a hole is outside compressed Bcols */ 2548 if (ncols == 0) { 2549 if (cstart) { 2550 offdiagIdx[r] = 0; 2551 } else offdiagIdx[r] = cend; 2552 } else { /* ncols > 0 */ 2553 offdiagIdx[r] = cmap[ncols-1] + 1; 2554 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2555 } 2556 } 2557 } 2558 2559 for (j=0; j<ncols; j++) { 2560 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2561 ba++; bj++; 2562 } 2563 } 2564 2565 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2566 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2567 for (r = 0; r < m; ++r) { 2568 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2569 a[r] = diagA[r]; 2570 if (idx) idx[r] = cstart + diagIdx[r]; 2571 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2572 a[r] = diagA[r]; 2573 if (idx) { 2574 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2575 idx[r] = cstart + diagIdx[r]; 2576 } else idx[r] = offdiagIdx[r]; 2577 } 2578 } else { 2579 a[r] = offdiagA[r]; 2580 if (idx) idx[r] = offdiagIdx[r]; 2581 } 2582 } 2583 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2584 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2585 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2586 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2587 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2588 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2593 { 2594 PetscErrorCode ierr; 2595 Mat *dummy; 2596 2597 PetscFunctionBegin; 2598 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2599 *newmat = *dummy; 2600 ierr = PetscFree(dummy);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2605 { 2606 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2607 PetscErrorCode ierr; 2608 2609 PetscFunctionBegin; 2610 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2611 A->factorerrortype = a->A->factorerrortype; 2612 PetscFunctionReturn(0); 2613 } 2614 2615 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2616 { 2617 PetscErrorCode ierr; 2618 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2619 2620 PetscFunctionBegin; 2621 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2622 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2623 if (x->assembled) { 2624 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2625 } else { 2626 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2627 } 2628 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2634 { 2635 PetscFunctionBegin; 2636 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2637 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 /*@ 2642 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2643 2644 Collective on Mat 2645 2646 Input Parameters: 2647 + A - the matrix 2648 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2649 2650 Level: advanced 2651 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2654 { 2655 PetscErrorCode ierr; 2656 2657 PetscFunctionBegin; 2658 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2663 { 2664 PetscErrorCode ierr; 2665 PetscBool sc = PETSC_FALSE,flg; 2666 2667 PetscFunctionBegin; 2668 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2669 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2670 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2671 if (flg) { 2672 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2673 } 2674 ierr = PetscOptionsTail();CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2679 { 2680 PetscErrorCode ierr; 2681 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2682 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2683 2684 PetscFunctionBegin; 2685 if (!Y->preallocated) { 2686 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2687 } else if (!aij->nz) { 2688 PetscInt nonew = aij->nonew; 2689 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2690 aij->nonew = nonew; 2691 } 2692 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2693 PetscFunctionReturn(0); 2694 } 2695 2696 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2703 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2704 if (d) { 2705 PetscInt rstart; 2706 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2707 *d += rstart; 2708 2709 } 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 /* -------------------------------------------------------------------*/ 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPIAIJ, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 NULL, 2775 NULL, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 MatIsSymmetric_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/NULL, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnNorms_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/NULL, 2870 NULL, 2871 NULL 2872 }; 2873 2874 /* ----------------------------------------------------------------------------------------*/ 2875 2876 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2877 { 2878 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2879 PetscErrorCode ierr; 2880 2881 PetscFunctionBegin; 2882 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2883 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2884 PetscFunctionReturn(0); 2885 } 2886 2887 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2888 { 2889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2890 PetscErrorCode ierr; 2891 2892 PetscFunctionBegin; 2893 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2894 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2899 { 2900 Mat_MPIAIJ *b; 2901 PetscErrorCode ierr; 2902 PetscMPIInt size; 2903 2904 PetscFunctionBegin; 2905 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2906 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2907 b = (Mat_MPIAIJ*)B->data; 2908 2909 #if defined(PETSC_USE_CTABLE) 2910 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2911 #else 2912 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2913 #endif 2914 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2915 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2916 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2917 2918 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2919 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2920 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2921 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2922 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2924 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2925 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2926 2927 if (!B->preallocated) { 2928 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2929 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2930 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2931 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2932 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2933 } 2934 2935 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2936 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2937 B->preallocated = PETSC_TRUE; 2938 B->was_assembled = PETSC_FALSE; 2939 B->assembled = PETSC_FALSE; 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2944 { 2945 Mat_MPIAIJ *b; 2946 PetscErrorCode ierr; 2947 2948 PetscFunctionBegin; 2949 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2950 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2951 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2952 b = (Mat_MPIAIJ*)B->data; 2953 2954 #if defined(PETSC_USE_CTABLE) 2955 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2956 #else 2957 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2958 #endif 2959 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2960 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2961 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2962 2963 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2964 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2965 B->preallocated = PETSC_TRUE; 2966 B->was_assembled = PETSC_FALSE; 2967 B->assembled = PETSC_FALSE; 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2972 { 2973 Mat mat; 2974 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2975 PetscErrorCode ierr; 2976 2977 PetscFunctionBegin; 2978 *newmat = NULL; 2979 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2980 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2981 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2982 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2983 a = (Mat_MPIAIJ*)mat->data; 2984 2985 mat->factortype = matin->factortype; 2986 mat->assembled = matin->assembled; 2987 mat->insertmode = NOT_SET_VALUES; 2988 mat->preallocated = matin->preallocated; 2989 2990 a->size = oldmat->size; 2991 a->rank = oldmat->rank; 2992 a->donotstash = oldmat->donotstash; 2993 a->roworiented = oldmat->roworiented; 2994 a->rowindices = NULL; 2995 a->rowvalues = NULL; 2996 a->getrowactive = PETSC_FALSE; 2997 2998 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2999 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3000 3001 if (oldmat->colmap) { 3002 #if defined(PETSC_USE_CTABLE) 3003 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3004 #else 3005 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3006 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3007 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3008 #endif 3009 } else a->colmap = NULL; 3010 if (oldmat->garray) { 3011 PetscInt len; 3012 len = oldmat->B->cmap->n; 3013 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3014 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3015 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3016 } else a->garray = NULL; 3017 3018 /* It may happen MatDuplicate is called with a non-assembled matrix 3019 In fact, MatDuplicate only requires the matrix to be preallocated 3020 This may happen inside a DMCreateMatrix_Shell */ 3021 if (oldmat->lvec) { 3022 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3023 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3024 } 3025 if (oldmat->Mvctx) { 3026 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3027 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3028 } 3029 if (oldmat->Mvctx_mpi1) { 3030 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3031 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3032 } 3033 3034 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3035 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3036 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3037 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3038 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3039 *newmat = mat; 3040 PetscFunctionReturn(0); 3041 } 3042 3043 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3044 { 3045 PetscBool isbinary, ishdf5; 3046 PetscErrorCode ierr; 3047 3048 PetscFunctionBegin; 3049 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3050 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3051 /* force binary viewer to load .info file if it has not yet done so */ 3052 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3053 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3054 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3055 if (isbinary) { 3056 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3057 } else if (ishdf5) { 3058 #if defined(PETSC_HAVE_HDF5) 3059 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3060 #else 3061 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3062 #endif 3063 } else { 3064 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3065 } 3066 PetscFunctionReturn(0); 3067 } 3068 3069 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3070 { 3071 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3072 PetscInt *rowidxs,*colidxs; 3073 PetscScalar *matvals; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3078 3079 /* read in matrix header */ 3080 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3081 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3082 M = header[1]; N = header[2]; nz = header[3]; 3083 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3084 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3085 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3086 3087 /* set block sizes from the viewer's .info file */ 3088 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3089 /* set global sizes if not set already */ 3090 if (mat->rmap->N < 0) mat->rmap->N = M; 3091 if (mat->cmap->N < 0) mat->cmap->N = N; 3092 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3093 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3094 3095 /* check if the matrix sizes are correct */ 3096 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3097 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3098 3099 /* read in row lengths and build row indices */ 3100 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3101 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3102 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3103 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3104 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3105 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3106 /* read in column indices and matrix values */ 3107 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3108 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3109 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3110 /* store matrix indices and values */ 3111 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3112 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3113 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3114 PetscFunctionReturn(0); 3115 } 3116 3117 /* Not scalable because of ISAllGather() unless getting all columns. */ 3118 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3119 { 3120 PetscErrorCode ierr; 3121 IS iscol_local; 3122 PetscBool isstride; 3123 PetscMPIInt lisstride=0,gisstride; 3124 3125 PetscFunctionBegin; 3126 /* check if we are grabbing all columns*/ 3127 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3128 3129 if (isstride) { 3130 PetscInt start,len,mstart,mlen; 3131 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3132 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3133 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3134 if (mstart == start && mlen-mstart == len) lisstride = 1; 3135 } 3136 3137 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3138 if (gisstride) { 3139 PetscInt N; 3140 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3141 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3142 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3143 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3144 } else { 3145 PetscInt cbs; 3146 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3147 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3148 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3149 } 3150 3151 *isseq = iscol_local; 3152 PetscFunctionReturn(0); 3153 } 3154 3155 /* 3156 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3157 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3158 3159 Input Parameters: 3160 mat - matrix 3161 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3162 i.e., mat->rstart <= isrow[i] < mat->rend 3163 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3164 i.e., mat->cstart <= iscol[i] < mat->cend 3165 Output Parameter: 3166 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3167 iscol_o - sequential column index set for retrieving mat->B 3168 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3169 */ 3170 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3171 { 3172 PetscErrorCode ierr; 3173 Vec x,cmap; 3174 const PetscInt *is_idx; 3175 PetscScalar *xarray,*cmaparray; 3176 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3177 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3178 Mat B=a->B; 3179 Vec lvec=a->lvec,lcmap; 3180 PetscInt i,cstart,cend,Bn=B->cmap->N; 3181 MPI_Comm comm; 3182 VecScatter Mvctx=a->Mvctx; 3183 3184 PetscFunctionBegin; 3185 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3186 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3187 3188 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3189 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3190 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3191 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3192 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3193 3194 /* Get start indices */ 3195 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3196 isstart -= ncols; 3197 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3198 3199 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3200 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3201 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3202 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3203 for (i=0; i<ncols; i++) { 3204 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3205 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3206 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3207 } 3208 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3209 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3210 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3211 3212 /* Get iscol_d */ 3213 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3214 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3215 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3216 3217 /* Get isrow_d */ 3218 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3219 rstart = mat->rmap->rstart; 3220 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3221 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3222 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3223 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3224 3225 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3226 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3227 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3228 3229 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3230 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3231 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3232 3233 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3234 3235 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 3238 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3239 /* off-process column indices */ 3240 count = 0; 3241 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3242 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3243 3244 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3245 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3246 for (i=0; i<Bn; i++) { 3247 if (PetscRealPart(xarray[i]) > -1.0) { 3248 idx[count] = i; /* local column index in off-diagonal part B */ 3249 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3250 count++; 3251 } 3252 } 3253 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3254 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3255 3256 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3257 /* cannot ensure iscol_o has same blocksize as iscol! */ 3258 3259 ierr = PetscFree(idx);CHKERRQ(ierr); 3260 *garray = cmap1; 3261 3262 ierr = VecDestroy(&x);CHKERRQ(ierr); 3263 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3264 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3269 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3270 { 3271 PetscErrorCode ierr; 3272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3273 Mat M = NULL; 3274 MPI_Comm comm; 3275 IS iscol_d,isrow_d,iscol_o; 3276 Mat Asub = NULL,Bsub = NULL; 3277 PetscInt n; 3278 3279 PetscFunctionBegin; 3280 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3281 3282 if (call == MAT_REUSE_MATRIX) { 3283 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3284 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3285 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3286 3287 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3288 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3289 3290 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3291 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3292 3293 /* Update diagonal and off-diagonal portions of submat */ 3294 asub = (Mat_MPIAIJ*)(*submat)->data; 3295 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3296 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3297 if (n) { 3298 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3299 } 3300 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 } else { /* call == MAT_INITIAL_MATRIX) */ 3304 const PetscInt *garray; 3305 PetscInt BsubN; 3306 3307 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3308 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3309 3310 /* Create local submatrices Asub and Bsub */ 3311 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3312 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3313 3314 /* Create submatrix M */ 3315 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3316 3317 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3318 asub = (Mat_MPIAIJ*)M->data; 3319 3320 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3321 n = asub->B->cmap->N; 3322 if (BsubN > n) { 3323 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3324 const PetscInt *idx; 3325 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3326 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3327 3328 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3329 j = 0; 3330 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3331 for (i=0; i<n; i++) { 3332 if (j >= BsubN) break; 3333 while (subgarray[i] > garray[j]) j++; 3334 3335 if (subgarray[i] == garray[j]) { 3336 idx_new[i] = idx[j++]; 3337 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3338 } 3339 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3340 3341 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3342 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3343 3344 } else if (BsubN < n) { 3345 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3346 } 3347 3348 ierr = PetscFree(garray);CHKERRQ(ierr); 3349 *submat = M; 3350 3351 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3352 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3353 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3354 3355 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3356 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3357 3358 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3359 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3360 } 3361 PetscFunctionReturn(0); 3362 } 3363 3364 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3365 { 3366 PetscErrorCode ierr; 3367 IS iscol_local=NULL,isrow_d; 3368 PetscInt csize; 3369 PetscInt n,i,j,start,end; 3370 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3371 MPI_Comm comm; 3372 3373 PetscFunctionBegin; 3374 /* If isrow has same processor distribution as mat, 3375 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3376 if (call == MAT_REUSE_MATRIX) { 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3378 if (isrow_d) { 3379 sameRowDist = PETSC_TRUE; 3380 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3381 } else { 3382 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3383 if (iscol_local) { 3384 sameRowDist = PETSC_TRUE; 3385 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3386 } 3387 } 3388 } else { 3389 /* Check if isrow has same processor distribution as mat */ 3390 sameDist[0] = PETSC_FALSE; 3391 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3392 if (!n) { 3393 sameDist[0] = PETSC_TRUE; 3394 } else { 3395 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3396 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3397 if (i >= start && j < end) { 3398 sameDist[0] = PETSC_TRUE; 3399 } 3400 } 3401 3402 /* Check if iscol has same processor distribution as mat */ 3403 sameDist[1] = PETSC_FALSE; 3404 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3405 if (!n) { 3406 sameDist[1] = PETSC_TRUE; 3407 } else { 3408 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3409 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3410 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3411 } 3412 3413 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3414 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3415 sameRowDist = tsameDist[0]; 3416 } 3417 3418 if (sameRowDist) { 3419 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3420 /* isrow and iscol have same processor distribution as mat */ 3421 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } else { /* sameRowDist */ 3424 /* isrow has same processor distribution as mat */ 3425 if (call == MAT_INITIAL_MATRIX) { 3426 PetscBool sorted; 3427 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3428 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3429 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3430 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3431 3432 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3433 if (sorted) { 3434 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3435 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 } else { /* call == MAT_REUSE_MATRIX */ 3439 IS iscol_sub; 3440 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3441 if (iscol_sub) { 3442 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 } 3446 } 3447 } 3448 3449 /* General case: iscol -> iscol_local which has global size of iscol */ 3450 if (call == MAT_REUSE_MATRIX) { 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3452 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3453 } else { 3454 if (!iscol_local) { 3455 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3456 } 3457 } 3458 3459 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3460 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3461 3462 if (call == MAT_INITIAL_MATRIX) { 3463 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3464 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3465 } 3466 PetscFunctionReturn(0); 3467 } 3468 3469 /*@C 3470 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3471 and "off-diagonal" part of the matrix in CSR format. 3472 3473 Collective 3474 3475 Input Parameters: 3476 + comm - MPI communicator 3477 . A - "diagonal" portion of matrix 3478 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3479 - garray - global index of B columns 3480 3481 Output Parameter: 3482 . mat - the matrix, with input A as its local diagonal matrix 3483 Level: advanced 3484 3485 Notes: 3486 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3487 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3488 3489 .seealso: MatCreateMPIAIJWithSplitArrays() 3490 @*/ 3491 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3492 { 3493 PetscErrorCode ierr; 3494 Mat_MPIAIJ *maij; 3495 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3496 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3497 PetscScalar *oa=b->a; 3498 Mat Bnew; 3499 PetscInt m,n,N; 3500 3501 PetscFunctionBegin; 3502 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3503 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3504 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3505 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3506 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3507 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3508 3509 /* Get global columns of mat */ 3510 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3511 3512 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3513 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3514 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3515 maij = (Mat_MPIAIJ*)(*mat)->data; 3516 3517 (*mat)->preallocated = PETSC_TRUE; 3518 3519 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3520 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3521 3522 /* Set A as diagonal portion of *mat */ 3523 maij->A = A; 3524 3525 nz = oi[m]; 3526 for (i=0; i<nz; i++) { 3527 col = oj[i]; 3528 oj[i] = garray[col]; 3529 } 3530 3531 /* Set Bnew as off-diagonal portion of *mat */ 3532 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3533 bnew = (Mat_SeqAIJ*)Bnew->data; 3534 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3535 maij->B = Bnew; 3536 3537 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3538 3539 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3540 b->free_a = PETSC_FALSE; 3541 b->free_ij = PETSC_FALSE; 3542 ierr = MatDestroy(&B);CHKERRQ(ierr); 3543 3544 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3545 bnew->free_a = PETSC_TRUE; 3546 bnew->free_ij = PETSC_TRUE; 3547 3548 /* condense columns of maij->B */ 3549 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3550 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3551 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3552 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3553 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3554 PetscFunctionReturn(0); 3555 } 3556 3557 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3558 3559 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3560 { 3561 PetscErrorCode ierr; 3562 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3563 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3564 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3565 Mat M,Msub,B=a->B; 3566 MatScalar *aa; 3567 Mat_SeqAIJ *aij; 3568 PetscInt *garray = a->garray,*colsub,Ncols; 3569 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3570 IS iscol_sub,iscmap; 3571 const PetscInt *is_idx,*cmap; 3572 PetscBool allcolumns=PETSC_FALSE; 3573 MPI_Comm comm; 3574 3575 PetscFunctionBegin; 3576 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3577 3578 if (call == MAT_REUSE_MATRIX) { 3579 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3580 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3581 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3582 3583 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3584 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3585 3586 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3587 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3588 3589 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3590 3591 } else { /* call == MAT_INITIAL_MATRIX) */ 3592 PetscBool flg; 3593 3594 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3595 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3596 3597 /* (1) iscol -> nonscalable iscol_local */ 3598 /* Check for special case: each processor gets entire matrix columns */ 3599 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3600 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3601 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3602 if (allcolumns) { 3603 iscol_sub = iscol_local; 3604 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3605 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3606 3607 } else { 3608 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3609 PetscInt *idx,*cmap1,k; 3610 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3611 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3612 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3613 count = 0; 3614 k = 0; 3615 for (i=0; i<Ncols; i++) { 3616 j = is_idx[i]; 3617 if (j >= cstart && j < cend) { 3618 /* diagonal part of mat */ 3619 idx[count] = j; 3620 cmap1[count++] = i; /* column index in submat */ 3621 } else if (Bn) { 3622 /* off-diagonal part of mat */ 3623 if (j == garray[k]) { 3624 idx[count] = j; 3625 cmap1[count++] = i; /* column index in submat */ 3626 } else if (j > garray[k]) { 3627 while (j > garray[k] && k < Bn-1) k++; 3628 if (j == garray[k]) { 3629 idx[count] = j; 3630 cmap1[count++] = i; /* column index in submat */ 3631 } 3632 } 3633 } 3634 } 3635 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3636 3637 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3638 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3639 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3640 3641 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3642 } 3643 3644 /* (3) Create sequential Msub */ 3645 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3646 } 3647 3648 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3649 aij = (Mat_SeqAIJ*)(Msub)->data; 3650 ii = aij->i; 3651 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3652 3653 /* 3654 m - number of local rows 3655 Ncols - number of columns (same on all processors) 3656 rstart - first row in new global matrix generated 3657 */ 3658 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3659 3660 if (call == MAT_INITIAL_MATRIX) { 3661 /* (4) Create parallel newmat */ 3662 PetscMPIInt rank,size; 3663 PetscInt csize; 3664 3665 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3666 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3667 3668 /* 3669 Determine the number of non-zeros in the diagonal and off-diagonal 3670 portions of the matrix in order to do correct preallocation 3671 */ 3672 3673 /* first get start and end of "diagonal" columns */ 3674 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3675 if (csize == PETSC_DECIDE) { 3676 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3677 if (mglobal == Ncols) { /* square matrix */ 3678 nlocal = m; 3679 } else { 3680 nlocal = Ncols/size + ((Ncols % size) > rank); 3681 } 3682 } else { 3683 nlocal = csize; 3684 } 3685 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3686 rstart = rend - nlocal; 3687 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3688 3689 /* next, compute all the lengths */ 3690 jj = aij->j; 3691 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3692 olens = dlens + m; 3693 for (i=0; i<m; i++) { 3694 jend = ii[i+1] - ii[i]; 3695 olen = 0; 3696 dlen = 0; 3697 for (j=0; j<jend; j++) { 3698 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3699 else dlen++; 3700 jj++; 3701 } 3702 olens[i] = olen; 3703 dlens[i] = dlen; 3704 } 3705 3706 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3707 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3708 3709 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3710 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3711 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3712 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3713 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3714 ierr = PetscFree(dlens);CHKERRQ(ierr); 3715 3716 } else { /* call == MAT_REUSE_MATRIX */ 3717 M = *newmat; 3718 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3719 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3720 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3721 /* 3722 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3723 rather than the slower MatSetValues(). 3724 */ 3725 M->was_assembled = PETSC_TRUE; 3726 M->assembled = PETSC_FALSE; 3727 } 3728 3729 /* (5) Set values of Msub to *newmat */ 3730 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3731 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3732 3733 jj = aij->j; 3734 aa = aij->a; 3735 for (i=0; i<m; i++) { 3736 row = rstart + i; 3737 nz = ii[i+1] - ii[i]; 3738 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3739 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3740 jj += nz; aa += nz; 3741 } 3742 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3743 3744 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3745 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3746 3747 ierr = PetscFree(colsub);CHKERRQ(ierr); 3748 3749 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3750 if (call == MAT_INITIAL_MATRIX) { 3751 *newmat = M; 3752 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3753 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3754 3755 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3756 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3757 3758 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3759 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3760 3761 if (iscol_local) { 3762 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3763 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3764 } 3765 } 3766 PetscFunctionReturn(0); 3767 } 3768 3769 /* 3770 Not great since it makes two copies of the submatrix, first an SeqAIJ 3771 in local and then by concatenating the local matrices the end result. 3772 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3773 3774 Note: This requires a sequential iscol with all indices. 3775 */ 3776 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3777 { 3778 PetscErrorCode ierr; 3779 PetscMPIInt rank,size; 3780 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3781 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3782 Mat M,Mreuse; 3783 MatScalar *aa,*vwork; 3784 MPI_Comm comm; 3785 Mat_SeqAIJ *aij; 3786 PetscBool colflag,allcolumns=PETSC_FALSE; 3787 3788 PetscFunctionBegin; 3789 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3790 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3791 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3792 3793 /* Check for special case: each processor gets entire matrix columns */ 3794 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3795 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3796 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3797 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3798 3799 if (call == MAT_REUSE_MATRIX) { 3800 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3801 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3802 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3803 } else { 3804 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3805 } 3806 3807 /* 3808 m - number of local rows 3809 n - number of columns (same on all processors) 3810 rstart - first row in new global matrix generated 3811 */ 3812 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3813 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3814 if (call == MAT_INITIAL_MATRIX) { 3815 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3816 ii = aij->i; 3817 jj = aij->j; 3818 3819 /* 3820 Determine the number of non-zeros in the diagonal and off-diagonal 3821 portions of the matrix in order to do correct preallocation 3822 */ 3823 3824 /* first get start and end of "diagonal" columns */ 3825 if (csize == PETSC_DECIDE) { 3826 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3827 if (mglobal == n) { /* square matrix */ 3828 nlocal = m; 3829 } else { 3830 nlocal = n/size + ((n % size) > rank); 3831 } 3832 } else { 3833 nlocal = csize; 3834 } 3835 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3836 rstart = rend - nlocal; 3837 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3838 3839 /* next, compute all the lengths */ 3840 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3841 olens = dlens + m; 3842 for (i=0; i<m; i++) { 3843 jend = ii[i+1] - ii[i]; 3844 olen = 0; 3845 dlen = 0; 3846 for (j=0; j<jend; j++) { 3847 if (*jj < rstart || *jj >= rend) olen++; 3848 else dlen++; 3849 jj++; 3850 } 3851 olens[i] = olen; 3852 dlens[i] = dlen; 3853 } 3854 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3855 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3856 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3857 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3858 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3859 ierr = PetscFree(dlens);CHKERRQ(ierr); 3860 } else { 3861 PetscInt ml,nl; 3862 3863 M = *newmat; 3864 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3865 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3866 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3867 /* 3868 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3869 rather than the slower MatSetValues(). 3870 */ 3871 M->was_assembled = PETSC_TRUE; 3872 M->assembled = PETSC_FALSE; 3873 } 3874 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3875 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3876 ii = aij->i; 3877 jj = aij->j; 3878 aa = aij->a; 3879 for (i=0; i<m; i++) { 3880 row = rstart + i; 3881 nz = ii[i+1] - ii[i]; 3882 cwork = jj; jj += nz; 3883 vwork = aa; aa += nz; 3884 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3885 } 3886 3887 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 *newmat = M; 3890 3891 /* save submatrix used in processor for next request */ 3892 if (call == MAT_INITIAL_MATRIX) { 3893 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3894 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3895 } 3896 PetscFunctionReturn(0); 3897 } 3898 3899 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3900 { 3901 PetscInt m,cstart, cend,j,nnz,i,d; 3902 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3903 const PetscInt *JJ; 3904 PetscErrorCode ierr; 3905 PetscBool nooffprocentries; 3906 3907 PetscFunctionBegin; 3908 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3909 3910 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3911 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3912 m = B->rmap->n; 3913 cstart = B->cmap->rstart; 3914 cend = B->cmap->rend; 3915 rstart = B->rmap->rstart; 3916 3917 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3918 3919 if (PetscDefined(USE_DEBUG)) { 3920 for (i=0; i<m; i++) { 3921 nnz = Ii[i+1]- Ii[i]; 3922 JJ = J + Ii[i]; 3923 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3924 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3925 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3926 } 3927 } 3928 3929 for (i=0; i<m; i++) { 3930 nnz = Ii[i+1]- Ii[i]; 3931 JJ = J + Ii[i]; 3932 nnz_max = PetscMax(nnz_max,nnz); 3933 d = 0; 3934 for (j=0; j<nnz; j++) { 3935 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3936 } 3937 d_nnz[i] = d; 3938 o_nnz[i] = nnz - d; 3939 } 3940 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3941 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3942 3943 for (i=0; i<m; i++) { 3944 ii = i + rstart; 3945 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3946 } 3947 nooffprocentries = B->nooffprocentries; 3948 B->nooffprocentries = PETSC_TRUE; 3949 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3950 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3951 B->nooffprocentries = nooffprocentries; 3952 3953 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3954 PetscFunctionReturn(0); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3959 (the default parallel PETSc format). 3960 3961 Collective 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into j for the start of each local row (starts with zero) 3966 . j - the column indices for each local row (starts with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3974 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3975 3976 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown 3981 3982 $ 1 0 0 3983 $ 2 0 3 P0 3984 $ ------- 3985 $ 4 5 6 P1 3986 $ 3987 $ Process0 [P0]: rows_owned=[0,1] 3988 $ i = {0,1,3} [size = nrow+1 = 2+1] 3989 $ j = {0,0,2} [size = 3] 3990 $ v = {1,2,3} [size = 3] 3991 $ 3992 $ Process1 [P1]: rows_owned=[2] 3993 $ i = {0,3} [size = nrow+1 = 1+1] 3994 $ j = {0,1,2} [size = 3] 3995 $ v = {4,5,6} [size = 3] 3996 3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3998 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3999 @*/ 4000 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4001 { 4002 PetscErrorCode ierr; 4003 4004 PetscFunctionBegin; 4005 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 /*@C 4010 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4011 (the default parallel PETSc format). For good matrix assembly performance 4012 the user should preallocate the matrix storage by setting the parameters 4013 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4014 performance can be increased by more than a factor of 50. 4015 4016 Collective 4017 4018 Input Parameters: 4019 + B - the matrix 4020 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4021 (same value is used for all local rows) 4022 . d_nnz - array containing the number of nonzeros in the various rows of the 4023 DIAGONAL portion of the local submatrix (possibly different for each row) 4024 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4025 The size of this array is equal to the number of local rows, i.e 'm'. 4026 For matrices that will be factored, you must leave room for (and set) 4027 the diagonal entry even if it is zero. 4028 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4029 submatrix (same value is used for all local rows). 4030 - o_nnz - array containing the number of nonzeros in the various rows of the 4031 OFF-DIAGONAL portion of the local submatrix (possibly different for 4032 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4033 structure. The size of this array is equal to the number 4034 of local rows, i.e 'm'. 4035 4036 If the *_nnz parameter is given then the *_nz parameter is ignored 4037 4038 The AIJ format (also called the Yale sparse matrix format or 4039 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See Users-Manual: ch_mat for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another SeqAIJ matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4136 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4139 { 4140 PetscErrorCode ierr; 4141 4142 PetscFunctionBegin; 4143 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4144 PetscValidType(B,1); 4145 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4146 PetscFunctionReturn(0); 4147 } 4148 4149 /*@ 4150 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4151 CSR format for the local rows. 4152 4153 Collective 4154 4155 Input Parameters: 4156 + comm - MPI communicator 4157 . m - number of local rows (Cannot be PETSC_DECIDE) 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4160 calculated if N is given) For square matrices n is almost always m. 4161 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4162 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4163 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4164 . j - column indices 4165 - a - matrix values 4166 4167 Output Parameter: 4168 . mat - the matrix 4169 4170 Level: intermediate 4171 4172 Notes: 4173 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4174 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4175 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4176 4177 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4178 4179 The format which is used for the sparse matrix input, is equivalent to a 4180 row-major ordering.. i.e for the following matrix, the input data expected is 4181 as shown 4182 4183 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4184 4185 $ 1 0 0 4186 $ 2 0 3 P0 4187 $ ------- 4188 $ 4 5 6 P1 4189 $ 4190 $ Process0 [P0]: rows_owned=[0,1] 4191 $ i = {0,1,3} [size = nrow+1 = 2+1] 4192 $ j = {0,0,2} [size = 3] 4193 $ v = {1,2,3} [size = 3] 4194 $ 4195 $ Process1 [P1]: rows_owned=[2] 4196 $ i = {0,3} [size = nrow+1 = 1+1] 4197 $ j = {0,1,2} [size = 3] 4198 $ v = {4,5,6} [size = 3] 4199 4200 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4201 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4202 @*/ 4203 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4204 { 4205 PetscErrorCode ierr; 4206 4207 PetscFunctionBegin; 4208 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4209 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4210 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4211 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4212 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4213 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4214 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4215 PetscFunctionReturn(0); 4216 } 4217 4218 /*@ 4219 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4220 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4221 4222 Collective 4223 4224 Input Parameters: 4225 + mat - the matrix 4226 . m - number of local rows (Cannot be PETSC_DECIDE) 4227 . n - This value should be the same as the local size used in creating the 4228 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4229 calculated if N is given) For square matrices n is almost always m. 4230 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4231 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4232 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4233 . J - column indices 4234 - v - matrix values 4235 4236 Level: intermediate 4237 4238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4239 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4240 @*/ 4241 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4242 { 4243 PetscErrorCode ierr; 4244 PetscInt cstart,nnz,i,j; 4245 PetscInt *ld; 4246 PetscBool nooffprocentries; 4247 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4248 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4249 PetscScalar *ad = Ad->a, *ao = Ao->a; 4250 const PetscInt *Adi = Ad->i; 4251 PetscInt ldi,Iii,md; 4252 4253 PetscFunctionBegin; 4254 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4255 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4256 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4257 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4258 4259 cstart = mat->cmap->rstart; 4260 if (!Aij->ld) { 4261 /* count number of entries below block diagonal */ 4262 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4263 Aij->ld = ld; 4264 for (i=0; i<m; i++) { 4265 nnz = Ii[i+1]- Ii[i]; 4266 j = 0; 4267 while (J[j] < cstart && j < nnz) {j++;} 4268 J += nnz; 4269 ld[i] = j; 4270 } 4271 } else { 4272 ld = Aij->ld; 4273 } 4274 4275 for (i=0; i<m; i++) { 4276 nnz = Ii[i+1]- Ii[i]; 4277 Iii = Ii[i]; 4278 ldi = ld[i]; 4279 md = Adi[i+1]-Adi[i]; 4280 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4281 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4282 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4283 ad += md; 4284 ao += nnz - md; 4285 } 4286 nooffprocentries = mat->nooffprocentries; 4287 mat->nooffprocentries = PETSC_TRUE; 4288 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4289 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4290 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4291 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4292 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4293 mat->nooffprocentries = nooffprocentries; 4294 PetscFunctionReturn(0); 4295 } 4296 4297 /*@C 4298 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4299 (the default parallel PETSc format). For good matrix assembly performance 4300 the user should preallocate the matrix storage by setting the parameters 4301 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4302 performance can be increased by more than a factor of 50. 4303 4304 Collective 4305 4306 Input Parameters: 4307 + comm - MPI communicator 4308 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4309 This value should be the same as the local size used in creating the 4310 y vector for the matrix-vector product y = Ax. 4311 . n - This value should be the same as the local size used in creating the 4312 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4313 calculated if N is given) For square matrices n is almost always m. 4314 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4315 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4316 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4317 (same value is used for all local rows) 4318 . d_nnz - array containing the number of nonzeros in the various rows of the 4319 DIAGONAL portion of the local submatrix (possibly different for each row) 4320 or NULL, if d_nz is used to specify the nonzero structure. 4321 The size of this array is equal to the number of local rows, i.e 'm'. 4322 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4323 submatrix (same value is used for all local rows). 4324 - o_nnz - array containing the number of nonzeros in the various rows of the 4325 OFF-DIAGONAL portion of the local submatrix (possibly different for 4326 each row) or NULL, if o_nz is used to specify the nonzero 4327 structure. The size of this array is equal to the number 4328 of local rows, i.e 'm'. 4329 4330 Output Parameter: 4331 . A - the matrix 4332 4333 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4334 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4335 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4336 4337 Notes: 4338 If the *_nnz parameter is given then the *_nz parameter is ignored 4339 4340 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4341 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4342 storage requirements for this matrix. 4343 4344 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4345 processor than it must be used on all processors that share the object for 4346 that argument. 4347 4348 The user MUST specify either the local or global matrix dimensions 4349 (possibly both). 4350 4351 The parallel matrix is partitioned across processors such that the 4352 first m0 rows belong to process 0, the next m1 rows belong to 4353 process 1, the next m2 rows belong to process 2 etc.. where 4354 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4355 values corresponding to [m x N] submatrix. 4356 4357 The columns are logically partitioned with the n0 columns belonging 4358 to 0th partition, the next n1 columns belonging to the next 4359 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4360 4361 The DIAGONAL portion of the local submatrix on any given processor 4362 is the submatrix corresponding to the rows and columns m,n 4363 corresponding to the given processor. i.e diagonal matrix on 4364 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4365 etc. The remaining portion of the local submatrix [m x (N-n)] 4366 constitute the OFF-DIAGONAL portion. The example below better 4367 illustrates this concept. 4368 4369 For a square global matrix we define each processor's diagonal portion 4370 to be its local rows and the corresponding columns (a square submatrix); 4371 each processor's off-diagonal portion encompasses the remainder of the 4372 local matrix (a rectangular submatrix). 4373 4374 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4375 4376 When calling this routine with a single process communicator, a matrix of 4377 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4378 type of communicator, use the construction mechanism 4379 .vb 4380 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4381 .ve 4382 4383 $ MatCreate(...,&A); 4384 $ MatSetType(A,MATMPIAIJ); 4385 $ MatSetSizes(A, m,n,M,N); 4386 $ MatMPIAIJSetPreallocation(A,...); 4387 4388 By default, this format uses inodes (identical nodes) when possible. 4389 We search for consecutive rows with the same nonzero structure, thereby 4390 reusing matrix information to achieve increased efficiency. 4391 4392 Options Database Keys: 4393 + -mat_no_inode - Do not use inodes 4394 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4395 4396 4397 4398 Example usage: 4399 4400 Consider the following 8x8 matrix with 34 non-zero values, that is 4401 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4402 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4403 as follows 4404 4405 .vb 4406 1 2 0 | 0 3 0 | 0 4 4407 Proc0 0 5 6 | 7 0 0 | 8 0 4408 9 0 10 | 11 0 0 | 12 0 4409 ------------------------------------- 4410 13 0 14 | 15 16 17 | 0 0 4411 Proc1 0 18 0 | 19 20 21 | 0 0 4412 0 0 0 | 22 23 0 | 24 0 4413 ------------------------------------- 4414 Proc2 25 26 27 | 0 0 28 | 29 0 4415 30 0 0 | 31 32 33 | 0 34 4416 .ve 4417 4418 This can be represented as a collection of submatrices as 4419 4420 .vb 4421 A B C 4422 D E F 4423 G H I 4424 .ve 4425 4426 Where the submatrices A,B,C are owned by proc0, D,E,F are 4427 owned by proc1, G,H,I are owned by proc2. 4428 4429 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4430 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4431 The 'M','N' parameters are 8,8, and have the same values on all procs. 4432 4433 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4434 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4435 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4436 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4437 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4438 matrix, ans [DF] as another SeqAIJ matrix. 4439 4440 When d_nz, o_nz parameters are specified, d_nz storage elements are 4441 allocated for every row of the local diagonal submatrix, and o_nz 4442 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4443 One way to choose d_nz and o_nz is to use the max nonzerors per local 4444 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4445 In this case, the values of d_nz,o_nz are 4446 .vb 4447 proc0 : dnz = 2, o_nz = 2 4448 proc1 : dnz = 3, o_nz = 2 4449 proc2 : dnz = 1, o_nz = 4 4450 .ve 4451 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4452 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4453 for proc3. i.e we are using 12+15+10=37 storage locations to store 4454 34 values. 4455 4456 When d_nnz, o_nnz parameters are specified, the storage is specified 4457 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4458 In the above case the values for d_nnz,o_nnz are 4459 .vb 4460 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4461 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4462 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4463 .ve 4464 Here the space allocated is sum of all the above values i.e 34, and 4465 hence pre-allocation is perfect. 4466 4467 Level: intermediate 4468 4469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4470 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4471 @*/ 4472 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4473 { 4474 PetscErrorCode ierr; 4475 PetscMPIInt size; 4476 4477 PetscFunctionBegin; 4478 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4479 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4480 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4481 if (size > 1) { 4482 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4483 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4484 } else { 4485 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4486 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4487 } 4488 PetscFunctionReturn(0); 4489 } 4490 4491 /*@C 4492 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4493 4494 Not collective 4495 4496 Input Parameter: 4497 . A - The MPIAIJ matrix 4498 4499 Output Parameters: 4500 + Ad - The local diagonal block as a SeqAIJ matrix 4501 . Ao - The local off-diagonal block as a SeqAIJ matrix 4502 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4503 4504 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4505 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4506 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4507 local column numbers to global column numbers in the original matrix. 4508 4509 Level: intermediate 4510 4511 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4512 @*/ 4513 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4514 { 4515 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4516 PetscBool flg; 4517 PetscErrorCode ierr; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4521 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4522 if (Ad) *Ad = a->A; 4523 if (Ao) *Ao = a->B; 4524 if (colmap) *colmap = a->garray; 4525 PetscFunctionReturn(0); 4526 } 4527 4528 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4529 { 4530 PetscErrorCode ierr; 4531 PetscInt m,N,i,rstart,nnz,Ii; 4532 PetscInt *indx; 4533 PetscScalar *values; 4534 4535 PetscFunctionBegin; 4536 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4537 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4538 PetscInt *dnz,*onz,sum,bs,cbs; 4539 4540 if (n == PETSC_DECIDE) { 4541 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4542 } 4543 /* Check sum(n) = N */ 4544 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4545 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4546 4547 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4548 rstart -= m; 4549 4550 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4551 for (i=0; i<m; i++) { 4552 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4553 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4554 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4555 } 4556 4557 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4558 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4559 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4560 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4561 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4562 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4563 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4564 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4565 } 4566 4567 /* numeric phase */ 4568 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4569 for (i=0; i<m; i++) { 4570 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4571 Ii = i + rstart; 4572 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4573 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4574 } 4575 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4576 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4577 PetscFunctionReturn(0); 4578 } 4579 4580 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4581 { 4582 PetscErrorCode ierr; 4583 PetscMPIInt rank; 4584 PetscInt m,N,i,rstart,nnz; 4585 size_t len; 4586 const PetscInt *indx; 4587 PetscViewer out; 4588 char *name; 4589 Mat B; 4590 const PetscScalar *values; 4591 4592 PetscFunctionBegin; 4593 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4594 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4595 /* Should this be the type of the diagonal block of A? */ 4596 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4597 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4598 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4599 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4600 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4601 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4602 for (i=0; i<m; i++) { 4603 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4604 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4605 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4606 } 4607 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4608 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 4610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4611 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4612 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4613 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4614 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4615 ierr = PetscFree(name);CHKERRQ(ierr); 4616 ierr = MatView(B,out);CHKERRQ(ierr); 4617 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4618 ierr = MatDestroy(&B);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4623 { 4624 PetscErrorCode ierr; 4625 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4626 4627 PetscFunctionBegin; 4628 if (!merge) PetscFunctionReturn(0); 4629 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4630 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4631 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4632 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4633 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4634 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4635 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4637 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4638 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4639 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4640 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4641 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4642 ierr = PetscFree(merge);CHKERRQ(ierr); 4643 PetscFunctionReturn(0); 4644 } 4645 4646 #include <../src/mat/utils/freespace.h> 4647 #include <petscbt.h> 4648 4649 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4650 { 4651 PetscErrorCode ierr; 4652 MPI_Comm comm; 4653 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4654 PetscMPIInt size,rank,taga,*len_s; 4655 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4656 PetscInt proc,m; 4657 PetscInt **buf_ri,**buf_rj; 4658 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4659 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4660 MPI_Request *s_waits,*r_waits; 4661 MPI_Status *status; 4662 MatScalar *aa=a->a; 4663 MatScalar **abuf_r,*ba_i; 4664 Mat_Merge_SeqsToMPI *merge; 4665 PetscContainer container; 4666 4667 PetscFunctionBegin; 4668 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4669 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4670 4671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4672 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4673 4674 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4675 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4676 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4677 4678 bi = merge->bi; 4679 bj = merge->bj; 4680 buf_ri = merge->buf_ri; 4681 buf_rj = merge->buf_rj; 4682 4683 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4684 owners = merge->rowmap->range; 4685 len_s = merge->len_s; 4686 4687 /* send and recv matrix values */ 4688 /*-----------------------------*/ 4689 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4690 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4691 4692 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4693 for (proc=0,k=0; proc<size; proc++) { 4694 if (!len_s[proc]) continue; 4695 i = owners[proc]; 4696 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4697 k++; 4698 } 4699 4700 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4701 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4702 ierr = PetscFree(status);CHKERRQ(ierr); 4703 4704 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4705 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4706 4707 /* insert mat values of mpimat */ 4708 /*----------------------------*/ 4709 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4710 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4711 4712 for (k=0; k<merge->nrecv; k++) { 4713 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4714 nrows = *(buf_ri_k[k]); 4715 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4716 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4717 } 4718 4719 /* set values of ba */ 4720 m = merge->rowmap->n; 4721 for (i=0; i<m; i++) { 4722 arow = owners[rank] + i; 4723 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4724 bnzi = bi[i+1] - bi[i]; 4725 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4726 4727 /* add local non-zero vals of this proc's seqmat into ba */ 4728 anzi = ai[arow+1] - ai[arow]; 4729 aj = a->j + ai[arow]; 4730 aa = a->a + ai[arow]; 4731 nextaj = 0; 4732 for (j=0; nextaj<anzi; j++) { 4733 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4734 ba_i[j] += aa[nextaj++]; 4735 } 4736 } 4737 4738 /* add received vals into ba */ 4739 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4740 /* i-th row */ 4741 if (i == *nextrow[k]) { 4742 anzi = *(nextai[k]+1) - *nextai[k]; 4743 aj = buf_rj[k] + *(nextai[k]); 4744 aa = abuf_r[k] + *(nextai[k]); 4745 nextaj = 0; 4746 for (j=0; nextaj<anzi; j++) { 4747 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4748 ba_i[j] += aa[nextaj++]; 4749 } 4750 } 4751 nextrow[k]++; nextai[k]++; 4752 } 4753 } 4754 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4755 } 4756 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4757 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4758 4759 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4760 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4761 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4762 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4763 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4764 PetscFunctionReturn(0); 4765 } 4766 4767 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4768 { 4769 PetscErrorCode ierr; 4770 Mat B_mpi; 4771 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4772 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4773 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4774 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4775 PetscInt len,proc,*dnz,*onz,bs,cbs; 4776 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4777 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4778 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4779 MPI_Status *status; 4780 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4781 PetscBT lnkbt; 4782 Mat_Merge_SeqsToMPI *merge; 4783 PetscContainer container; 4784 4785 PetscFunctionBegin; 4786 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4787 4788 /* make sure it is a PETSc comm */ 4789 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4790 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4791 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4792 4793 ierr = PetscNew(&merge);CHKERRQ(ierr); 4794 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4795 4796 /* determine row ownership */ 4797 /*---------------------------------------------------------*/ 4798 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4799 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4800 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4801 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4802 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4803 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4804 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4805 4806 m = merge->rowmap->n; 4807 owners = merge->rowmap->range; 4808 4809 /* determine the number of messages to send, their lengths */ 4810 /*---------------------------------------------------------*/ 4811 len_s = merge->len_s; 4812 4813 len = 0; /* length of buf_si[] */ 4814 merge->nsend = 0; 4815 for (proc=0; proc<size; proc++) { 4816 len_si[proc] = 0; 4817 if (proc == rank) { 4818 len_s[proc] = 0; 4819 } else { 4820 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4821 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4822 } 4823 if (len_s[proc]) { 4824 merge->nsend++; 4825 nrows = 0; 4826 for (i=owners[proc]; i<owners[proc+1]; i++) { 4827 if (ai[i+1] > ai[i]) nrows++; 4828 } 4829 len_si[proc] = 2*(nrows+1); 4830 len += len_si[proc]; 4831 } 4832 } 4833 4834 /* determine the number and length of messages to receive for ij-structure */ 4835 /*-------------------------------------------------------------------------*/ 4836 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4837 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4838 4839 /* post the Irecv of j-structure */ 4840 /*-------------------------------*/ 4841 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4842 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4843 4844 /* post the Isend of j-structure */ 4845 /*--------------------------------*/ 4846 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4847 4848 for (proc=0, k=0; proc<size; proc++) { 4849 if (!len_s[proc]) continue; 4850 i = owners[proc]; 4851 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4852 k++; 4853 } 4854 4855 /* receives and sends of j-structure are complete */ 4856 /*------------------------------------------------*/ 4857 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4858 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4859 4860 /* send and recv i-structure */ 4861 /*---------------------------*/ 4862 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4863 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4864 4865 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4866 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4867 for (proc=0,k=0; proc<size; proc++) { 4868 if (!len_s[proc]) continue; 4869 /* form outgoing message for i-structure: 4870 buf_si[0]: nrows to be sent 4871 [1:nrows]: row index (global) 4872 [nrows+1:2*nrows+1]: i-structure index 4873 */ 4874 /*-------------------------------------------*/ 4875 nrows = len_si[proc]/2 - 1; 4876 buf_si_i = buf_si + nrows+1; 4877 buf_si[0] = nrows; 4878 buf_si_i[0] = 0; 4879 nrows = 0; 4880 for (i=owners[proc]; i<owners[proc+1]; i++) { 4881 anzi = ai[i+1] - ai[i]; 4882 if (anzi) { 4883 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4884 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4885 nrows++; 4886 } 4887 } 4888 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4889 k++; 4890 buf_si += len_si[proc]; 4891 } 4892 4893 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4894 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4895 4896 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4897 for (i=0; i<merge->nrecv; i++) { 4898 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4899 } 4900 4901 ierr = PetscFree(len_si);CHKERRQ(ierr); 4902 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4903 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4904 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4905 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4906 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4907 ierr = PetscFree(status);CHKERRQ(ierr); 4908 4909 /* compute a local seq matrix in each processor */ 4910 /*----------------------------------------------*/ 4911 /* allocate bi array and free space for accumulating nonzero column info */ 4912 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4913 bi[0] = 0; 4914 4915 /* create and initialize a linked list */ 4916 nlnk = N+1; 4917 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4918 4919 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4920 len = ai[owners[rank+1]] - ai[owners[rank]]; 4921 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4922 4923 current_space = free_space; 4924 4925 /* determine symbolic info for each local row */ 4926 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4927 4928 for (k=0; k<merge->nrecv; k++) { 4929 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4930 nrows = *buf_ri_k[k]; 4931 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4932 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4933 } 4934 4935 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4936 len = 0; 4937 for (i=0; i<m; i++) { 4938 bnzi = 0; 4939 /* add local non-zero cols of this proc's seqmat into lnk */ 4940 arow = owners[rank] + i; 4941 anzi = ai[arow+1] - ai[arow]; 4942 aj = a->j + ai[arow]; 4943 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4944 bnzi += nlnk; 4945 /* add received col data into lnk */ 4946 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4947 if (i == *nextrow[k]) { /* i-th row */ 4948 anzi = *(nextai[k]+1) - *nextai[k]; 4949 aj = buf_rj[k] + *nextai[k]; 4950 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4951 bnzi += nlnk; 4952 nextrow[k]++; nextai[k]++; 4953 } 4954 } 4955 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4956 4957 /* if free space is not available, make more free space */ 4958 if (current_space->local_remaining<bnzi) { 4959 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4960 nspacedouble++; 4961 } 4962 /* copy data into free space, then initialize lnk */ 4963 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4964 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4965 4966 current_space->array += bnzi; 4967 current_space->local_used += bnzi; 4968 current_space->local_remaining -= bnzi; 4969 4970 bi[i+1] = bi[i] + bnzi; 4971 } 4972 4973 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4974 4975 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4976 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4977 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4978 4979 /* create symbolic parallel matrix B_mpi */ 4980 /*---------------------------------------*/ 4981 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4982 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4983 if (n==PETSC_DECIDE) { 4984 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4985 } else { 4986 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4987 } 4988 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4989 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4990 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4991 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4992 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4993 4994 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4995 B_mpi->assembled = PETSC_FALSE; 4996 merge->bi = bi; 4997 merge->bj = bj; 4998 merge->buf_ri = buf_ri; 4999 merge->buf_rj = buf_rj; 5000 merge->coi = NULL; 5001 merge->coj = NULL; 5002 merge->owners_co = NULL; 5003 5004 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5005 5006 /* attach the supporting struct to B_mpi for reuse */ 5007 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5008 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5009 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5010 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5011 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5012 *mpimat = B_mpi; 5013 5014 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 5018 /*@C 5019 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5020 matrices from each processor 5021 5022 Collective 5023 5024 Input Parameters: 5025 + comm - the communicators the parallel matrix will live on 5026 . seqmat - the input sequential matrices 5027 . m - number of local rows (or PETSC_DECIDE) 5028 . n - number of local columns (or PETSC_DECIDE) 5029 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5030 5031 Output Parameter: 5032 . mpimat - the parallel matrix generated 5033 5034 Level: advanced 5035 5036 Notes: 5037 The dimensions of the sequential matrix in each processor MUST be the same. 5038 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5039 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5040 @*/ 5041 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5042 { 5043 PetscErrorCode ierr; 5044 PetscMPIInt size; 5045 5046 PetscFunctionBegin; 5047 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5048 if (size == 1) { 5049 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5052 } else { 5053 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5054 } 5055 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5056 PetscFunctionReturn(0); 5057 } 5058 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5059 if (scall == MAT_INITIAL_MATRIX) { 5060 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5061 } 5062 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5063 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5064 PetscFunctionReturn(0); 5065 } 5066 5067 /*@ 5068 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5069 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5070 with MatGetSize() 5071 5072 Not Collective 5073 5074 Input Parameters: 5075 + A - the matrix 5076 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5077 5078 Output Parameter: 5079 . A_loc - the local sequential matrix generated 5080 5081 Level: developer 5082 5083 Notes: 5084 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5085 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5086 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5087 modify the values of the returned A_loc. 5088 5089 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5090 5091 @*/ 5092 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5093 { 5094 PetscErrorCode ierr; 5095 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5096 Mat_SeqAIJ *mat,*a,*b; 5097 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5098 MatScalar *aa,*ba,*cam; 5099 PetscScalar *ca; 5100 PetscMPIInt size; 5101 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5102 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5103 PetscBool match; 5104 5105 PetscFunctionBegin; 5106 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5107 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5108 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5109 if (size == 1) { 5110 if (scall == MAT_INITIAL_MATRIX) { 5111 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5112 *A_loc = mpimat->A; 5113 } else if (scall == MAT_REUSE_MATRIX) { 5114 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5115 } 5116 PetscFunctionReturn(0); 5117 } 5118 5119 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5120 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5121 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5122 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5123 aa = a->a; ba = b->a; 5124 if (scall == MAT_INITIAL_MATRIX) { 5125 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5126 ci[0] = 0; 5127 for (i=0; i<am; i++) { 5128 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5129 } 5130 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5131 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5132 k = 0; 5133 for (i=0; i<am; i++) { 5134 ncols_o = bi[i+1] - bi[i]; 5135 ncols_d = ai[i+1] - ai[i]; 5136 /* off-diagonal portion of A */ 5137 for (jo=0; jo<ncols_o; jo++) { 5138 col = cmap[*bj]; 5139 if (col >= cstart) break; 5140 cj[k] = col; bj++; 5141 ca[k++] = *ba++; 5142 } 5143 /* diagonal portion of A */ 5144 for (j=0; j<ncols_d; j++) { 5145 cj[k] = cstart + *aj++; 5146 ca[k++] = *aa++; 5147 } 5148 /* off-diagonal portion of A */ 5149 for (j=jo; j<ncols_o; j++) { 5150 cj[k] = cmap[*bj++]; 5151 ca[k++] = *ba++; 5152 } 5153 } 5154 /* put together the new matrix */ 5155 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5156 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5157 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5158 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5159 mat->free_a = PETSC_TRUE; 5160 mat->free_ij = PETSC_TRUE; 5161 mat->nonew = 0; 5162 } else if (scall == MAT_REUSE_MATRIX) { 5163 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5164 ci = mat->i; cj = mat->j; cam = mat->a; 5165 for (i=0; i<am; i++) { 5166 /* off-diagonal portion of A */ 5167 ncols_o = bi[i+1] - bi[i]; 5168 for (jo=0; jo<ncols_o; jo++) { 5169 col = cmap[*bj]; 5170 if (col >= cstart) break; 5171 *cam++ = *ba++; bj++; 5172 } 5173 /* diagonal portion of A */ 5174 ncols_d = ai[i+1] - ai[i]; 5175 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5176 /* off-diagonal portion of A */ 5177 for (j=jo; j<ncols_o; j++) { 5178 *cam++ = *ba++; bj++; 5179 } 5180 } 5181 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5182 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 /*@C 5187 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5188 5189 Not Collective 5190 5191 Input Parameters: 5192 + A - the matrix 5193 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5194 - row, col - index sets of rows and columns to extract (or NULL) 5195 5196 Output Parameter: 5197 . A_loc - the local sequential matrix generated 5198 5199 Level: developer 5200 5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5202 5203 @*/ 5204 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5205 { 5206 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5207 PetscErrorCode ierr; 5208 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5209 IS isrowa,iscola; 5210 Mat *aloc; 5211 PetscBool match; 5212 5213 PetscFunctionBegin; 5214 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5215 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5216 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5217 if (!row) { 5218 start = A->rmap->rstart; end = A->rmap->rend; 5219 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5220 } else { 5221 isrowa = *row; 5222 } 5223 if (!col) { 5224 start = A->cmap->rstart; 5225 cmap = a->garray; 5226 nzA = a->A->cmap->n; 5227 nzB = a->B->cmap->n; 5228 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5229 ncols = 0; 5230 for (i=0; i<nzB; i++) { 5231 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5232 else break; 5233 } 5234 imark = i; 5235 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5236 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5237 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5238 } else { 5239 iscola = *col; 5240 } 5241 if (scall != MAT_INITIAL_MATRIX) { 5242 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5243 aloc[0] = *A_loc; 5244 } 5245 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5246 if (!col) { /* attach global id of condensed columns */ 5247 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5248 } 5249 *A_loc = aloc[0]; 5250 ierr = PetscFree(aloc);CHKERRQ(ierr); 5251 if (!row) { 5252 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5253 } 5254 if (!col) { 5255 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5256 } 5257 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5258 PetscFunctionReturn(0); 5259 } 5260 5261 /* 5262 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5263 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5264 * on a global size. 5265 * */ 5266 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5267 { 5268 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5269 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5270 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5271 PetscMPIInt owner; 5272 PetscSFNode *iremote,*oiremote; 5273 const PetscInt *lrowindices; 5274 PetscErrorCode ierr; 5275 PetscSF sf,osf; 5276 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5277 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5278 MPI_Comm comm; 5279 ISLocalToGlobalMapping mapping; 5280 5281 PetscFunctionBegin; 5282 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5283 /* plocalsize is the number of roots 5284 * nrows is the number of leaves 5285 * */ 5286 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5287 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5288 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5289 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5290 for (i=0;i<nrows;i++) { 5291 /* Find a remote index and an owner for a row 5292 * The row could be local or remote 5293 * */ 5294 owner = 0; 5295 lidx = 0; 5296 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5297 iremote[i].index = lidx; 5298 iremote[i].rank = owner; 5299 } 5300 /* Create SF to communicate how many nonzero columns for each row */ 5301 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5302 /* SF will figure out the number of nonzero colunms for each row, and their 5303 * offsets 5304 * */ 5305 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5306 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5307 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5308 5309 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5310 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5311 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5312 roffsets[0] = 0; 5313 roffsets[1] = 0; 5314 for (i=0;i<plocalsize;i++) { 5315 /* diag */ 5316 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5317 /* off diag */ 5318 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5319 /* compute offsets so that we relative location for each row */ 5320 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5321 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5322 } 5323 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5324 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5325 /* 'r' means root, and 'l' means leaf */ 5326 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5327 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5328 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5329 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5330 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5331 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5332 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5333 dntotalcols = 0; 5334 ontotalcols = 0; 5335 ncol = 0; 5336 for (i=0;i<nrows;i++) { 5337 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5338 ncol = PetscMax(pnnz[i],ncol); 5339 /* diag */ 5340 dntotalcols += nlcols[i*2+0]; 5341 /* off diag */ 5342 ontotalcols += nlcols[i*2+1]; 5343 } 5344 /* We do not need to figure the right number of columns 5345 * since all the calculations will be done by going through the raw data 5346 * */ 5347 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5348 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5349 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5350 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5351 /* diag */ 5352 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5353 /* off diag */ 5354 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5355 /* diag */ 5356 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5357 /* off diag */ 5358 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5359 dntotalcols = 0; 5360 ontotalcols = 0; 5361 ntotalcols = 0; 5362 for (i=0;i<nrows;i++) { 5363 owner = 0; 5364 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5365 /* Set iremote for diag matrix */ 5366 for (j=0;j<nlcols[i*2+0];j++) { 5367 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5368 iremote[dntotalcols].rank = owner; 5369 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5370 ilocal[dntotalcols++] = ntotalcols++; 5371 } 5372 /* off diag */ 5373 for (j=0;j<nlcols[i*2+1];j++) { 5374 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5375 oiremote[ontotalcols].rank = owner; 5376 oilocal[ontotalcols++] = ntotalcols++; 5377 } 5378 } 5379 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5380 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5381 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5382 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5383 /* P serves as roots and P_oth is leaves 5384 * Diag matrix 5385 * */ 5386 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5387 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5388 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5389 5390 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5391 /* Off diag */ 5392 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5393 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5394 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5395 /* We operate on the matrix internal data for saving memory */ 5396 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5397 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5398 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5399 /* Convert to global indices for diag matrix */ 5400 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5401 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5402 /* We want P_oth store global indices */ 5403 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5404 /* Use memory scalable approach */ 5405 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5406 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5407 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5408 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5409 /* Convert back to local indices */ 5410 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5411 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5412 nout = 0; 5413 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5414 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5415 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5416 /* Exchange values */ 5417 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5418 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5419 /* Stop PETSc from shrinking memory */ 5420 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5421 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5422 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5423 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5424 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5425 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5426 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5427 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5428 PetscFunctionReturn(0); 5429 } 5430 5431 /* 5432 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5433 * This supports MPIAIJ and MAIJ 5434 * */ 5435 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5436 { 5437 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5438 Mat_SeqAIJ *p_oth; 5439 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5440 IS rows,map; 5441 PetscHMapI hamp; 5442 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5443 MPI_Comm comm; 5444 PetscSF sf,osf; 5445 PetscBool has; 5446 PetscErrorCode ierr; 5447 5448 PetscFunctionBegin; 5449 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5450 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5451 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5452 * and then create a submatrix (that often is an overlapping matrix) 5453 * */ 5454 if (reuse == MAT_INITIAL_MATRIX) { 5455 /* Use a hash table to figure out unique keys */ 5456 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5457 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5458 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5459 count = 0; 5460 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5461 for (i=0;i<a->B->cmap->n;i++) { 5462 key = a->garray[i]/dof; 5463 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5464 if (!has) { 5465 mapping[i] = count; 5466 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5467 } else { 5468 /* Current 'i' has the same value the previous step */ 5469 mapping[i] = count-1; 5470 } 5471 } 5472 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5473 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5474 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5475 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5476 off = 0; 5477 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5478 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5479 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5480 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5481 /* In case, the matrix was already created but users want to recreate the matrix */ 5482 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5483 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5484 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5485 ierr = ISDestroy(&map);CHKERRQ(ierr); 5486 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5487 } else if (reuse == MAT_REUSE_MATRIX) { 5488 /* If matrix was already created, we simply update values using SF objects 5489 * that as attached to the matrix ealier. 5490 * */ 5491 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5492 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5493 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5494 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5495 /* Update values in place */ 5496 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5497 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5498 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5499 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5500 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5501 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /*@C 5506 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5507 5508 Collective on Mat 5509 5510 Input Parameters: 5511 + A,B - the matrices in mpiaij format 5512 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5513 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5514 5515 Output Parameter: 5516 + rowb, colb - index sets of rows and columns of B to extract 5517 - B_seq - the sequential matrix generated 5518 5519 Level: developer 5520 5521 @*/ 5522 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5523 { 5524 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5525 PetscErrorCode ierr; 5526 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5527 IS isrowb,iscolb; 5528 Mat *bseq=NULL; 5529 5530 PetscFunctionBegin; 5531 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5532 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5533 } 5534 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5535 5536 if (scall == MAT_INITIAL_MATRIX) { 5537 start = A->cmap->rstart; 5538 cmap = a->garray; 5539 nzA = a->A->cmap->n; 5540 nzB = a->B->cmap->n; 5541 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5542 ncols = 0; 5543 for (i=0; i<nzB; i++) { /* row < local row index */ 5544 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5545 else break; 5546 } 5547 imark = i; 5548 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5549 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5550 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5551 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5552 } else { 5553 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5554 isrowb = *rowb; iscolb = *colb; 5555 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5556 bseq[0] = *B_seq; 5557 } 5558 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5559 *B_seq = bseq[0]; 5560 ierr = PetscFree(bseq);CHKERRQ(ierr); 5561 if (!rowb) { 5562 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5563 } else { 5564 *rowb = isrowb; 5565 } 5566 if (!colb) { 5567 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5568 } else { 5569 *colb = iscolb; 5570 } 5571 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5572 PetscFunctionReturn(0); 5573 } 5574 5575 /* 5576 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5577 of the OFF-DIAGONAL portion of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 5585 Output Parameter: 5586 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5587 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5588 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5589 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5590 5591 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5592 for this matrix. This is not desirable.. 5593 5594 Level: developer 5595 5596 */ 5597 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5598 { 5599 PetscErrorCode ierr; 5600 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5601 Mat_SeqAIJ *b_oth; 5602 VecScatter ctx; 5603 MPI_Comm comm; 5604 const PetscMPIInt *rprocs,*sprocs; 5605 const PetscInt *srow,*rstarts,*sstarts; 5606 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5607 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5608 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5609 MPI_Request *rwaits = NULL,*swaits = NULL; 5610 MPI_Status rstatus; 5611 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5612 5613 PetscFunctionBegin; 5614 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5616 5617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5618 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5619 } 5620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5622 5623 if (size == 1) { 5624 startsj_s = NULL; 5625 bufa_ptr = NULL; 5626 *B_oth = NULL; 5627 PetscFunctionReturn(0); 5628 } 5629 5630 ctx = a->Mvctx; 5631 tag = ((PetscObject)ctx)->tag; 5632 5633 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5634 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5635 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5636 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5637 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5638 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5639 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5640 5641 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5642 if (scall == MAT_INITIAL_MATRIX) { 5643 /* i-array */ 5644 /*---------*/ 5645 /* post receives */ 5646 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5647 for (i=0; i<nrecvs; i++) { 5648 rowlen = rvalues + rstarts[i]*rbs; 5649 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5650 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5651 } 5652 5653 /* pack the outgoing message */ 5654 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5655 5656 sstartsj[0] = 0; 5657 rstartsj[0] = 0; 5658 len = 0; /* total length of j or a array to be sent */ 5659 if (nsends) { 5660 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5661 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5662 } 5663 for (i=0; i<nsends; i++) { 5664 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5665 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5666 for (j=0; j<nrows; j++) { 5667 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5668 for (l=0; l<sbs; l++) { 5669 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5670 5671 rowlen[j*sbs+l] = ncols; 5672 5673 len += ncols; 5674 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5675 } 5676 k++; 5677 } 5678 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5679 5680 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5681 } 5682 /* recvs and sends of i-array are completed */ 5683 i = nrecvs; 5684 while (i--) { 5685 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5686 } 5687 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5688 ierr = PetscFree(svalues);CHKERRQ(ierr); 5689 5690 /* allocate buffers for sending j and a arrays */ 5691 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5692 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5693 5694 /* create i-array of B_oth */ 5695 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5696 5697 b_othi[0] = 0; 5698 len = 0; /* total length of j or a array to be received */ 5699 k = 0; 5700 for (i=0; i<nrecvs; i++) { 5701 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5702 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5703 for (j=0; j<nrows; j++) { 5704 b_othi[k+1] = b_othi[k] + rowlen[j]; 5705 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5706 k++; 5707 } 5708 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5709 } 5710 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5711 5712 /* allocate space for j and a arrrays of B_oth */ 5713 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5714 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5715 5716 /* j-array */ 5717 /*---------*/ 5718 /* post receives of j-array */ 5719 for (i=0; i<nrecvs; i++) { 5720 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5721 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5722 } 5723 5724 /* pack the outgoing message j-array */ 5725 if (nsends) k = sstarts[0]; 5726 for (i=0; i<nsends; i++) { 5727 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5728 bufJ = bufj+sstartsj[i]; 5729 for (j=0; j<nrows; j++) { 5730 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5731 for (ll=0; ll<sbs; ll++) { 5732 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5733 for (l=0; l<ncols; l++) { 5734 *bufJ++ = cols[l]; 5735 } 5736 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5737 } 5738 } 5739 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5740 } 5741 5742 /* recvs and sends of j-array are completed */ 5743 i = nrecvs; 5744 while (i--) { 5745 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5746 } 5747 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5748 } else if (scall == MAT_REUSE_MATRIX) { 5749 sstartsj = *startsj_s; 5750 rstartsj = *startsj_r; 5751 bufa = *bufa_ptr; 5752 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5753 b_otha = b_oth->a; 5754 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5755 5756 /* a-array */ 5757 /*---------*/ 5758 /* post receives of a-array */ 5759 for (i=0; i<nrecvs; i++) { 5760 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5761 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5762 } 5763 5764 /* pack the outgoing message a-array */ 5765 if (nsends) k = sstarts[0]; 5766 for (i=0; i<nsends; i++) { 5767 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5768 bufA = bufa+sstartsj[i]; 5769 for (j=0; j<nrows; j++) { 5770 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5771 for (ll=0; ll<sbs; ll++) { 5772 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5773 for (l=0; l<ncols; l++) { 5774 *bufA++ = vals[l]; 5775 } 5776 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5777 } 5778 } 5779 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5780 } 5781 /* recvs and sends of a-array are completed */ 5782 i = nrecvs; 5783 while (i--) { 5784 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5785 } 5786 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5787 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5788 5789 if (scall == MAT_INITIAL_MATRIX) { 5790 /* put together the new matrix */ 5791 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5792 5793 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5794 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5795 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5796 b_oth->free_a = PETSC_TRUE; 5797 b_oth->free_ij = PETSC_TRUE; 5798 b_oth->nonew = 0; 5799 5800 ierr = PetscFree(bufj);CHKERRQ(ierr); 5801 if (!startsj_s || !bufa_ptr) { 5802 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5803 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5804 } else { 5805 *startsj_s = sstartsj; 5806 *startsj_r = rstartsj; 5807 *bufa_ptr = bufa; 5808 } 5809 } 5810 5811 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5812 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5813 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5814 PetscFunctionReturn(0); 5815 } 5816 5817 /*@C 5818 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5819 5820 Not Collective 5821 5822 Input Parameters: 5823 . A - The matrix in mpiaij format 5824 5825 Output Parameter: 5826 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5827 . colmap - A map from global column index to local index into lvec 5828 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5829 5830 Level: developer 5831 5832 @*/ 5833 #if defined(PETSC_USE_CTABLE) 5834 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5835 #else 5836 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5837 #endif 5838 { 5839 Mat_MPIAIJ *a; 5840 5841 PetscFunctionBegin; 5842 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5843 PetscValidPointer(lvec, 2); 5844 PetscValidPointer(colmap, 3); 5845 PetscValidPointer(multScatter, 4); 5846 a = (Mat_MPIAIJ*) A->data; 5847 if (lvec) *lvec = a->lvec; 5848 if (colmap) *colmap = a->colmap; 5849 if (multScatter) *multScatter = a->Mvctx; 5850 PetscFunctionReturn(0); 5851 } 5852 5853 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5856 #if defined(PETSC_HAVE_MKL_SPARSE) 5857 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5858 #endif 5859 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5860 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5861 #if defined(PETSC_HAVE_ELEMENTAL) 5862 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5863 #endif 5864 #if defined(PETSC_HAVE_SCALAPACK) 5865 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5866 #endif 5867 #if defined(PETSC_HAVE_HYPRE) 5868 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5869 #endif 5870 #if defined(PETSC_HAVE_CUDA) 5871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5872 #endif 5873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5874 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5875 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5876 5877 /* 5878 Computes (B'*A')' since computing B*A directly is untenable 5879 5880 n p p 5881 [ ] [ ] [ ] 5882 m [ A ] * n [ B ] = m [ C ] 5883 [ ] [ ] [ ] 5884 5885 */ 5886 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5887 { 5888 PetscErrorCode ierr; 5889 Mat At,Bt,Ct; 5890 5891 PetscFunctionBegin; 5892 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5893 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5894 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5895 ierr = MatDestroy(&At);CHKERRQ(ierr); 5896 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5897 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5898 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5899 PetscFunctionReturn(0); 5900 } 5901 5902 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5903 { 5904 PetscErrorCode ierr; 5905 PetscBool cisdense; 5906 5907 PetscFunctionBegin; 5908 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5909 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5910 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5911 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5912 if (!cisdense) { 5913 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5914 } 5915 ierr = MatSetUp(C);CHKERRQ(ierr); 5916 5917 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5918 PetscFunctionReturn(0); 5919 } 5920 5921 /* ----------------------------------------------------------------*/ 5922 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5923 { 5924 Mat_Product *product = C->product; 5925 Mat A = product->A,B=product->B; 5926 5927 PetscFunctionBegin; 5928 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5929 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5930 5931 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5932 C->ops->productsymbolic = MatProductSymbolic_AB; 5933 PetscFunctionReturn(0); 5934 } 5935 5936 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5937 { 5938 PetscErrorCode ierr; 5939 Mat_Product *product = C->product; 5940 5941 PetscFunctionBegin; 5942 if (product->type == MATPRODUCT_AB) { 5943 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5944 } 5945 PetscFunctionReturn(0); 5946 } 5947 /* ----------------------------------------------------------------*/ 5948 5949 /*MC 5950 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5951 5952 Options Database Keys: 5953 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5954 5955 Level: beginner 5956 5957 Notes: 5958 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5959 in this case the values associated with the rows and columns one passes in are set to zero 5960 in the matrix 5961 5962 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5963 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5964 5965 .seealso: MatCreateAIJ() 5966 M*/ 5967 5968 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5969 { 5970 Mat_MPIAIJ *b; 5971 PetscErrorCode ierr; 5972 PetscMPIInt size; 5973 5974 PetscFunctionBegin; 5975 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5976 5977 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5978 B->data = (void*)b; 5979 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5980 B->assembled = PETSC_FALSE; 5981 B->insertmode = NOT_SET_VALUES; 5982 b->size = size; 5983 5984 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5985 5986 /* build cache for off array entries formed */ 5987 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5988 5989 b->donotstash = PETSC_FALSE; 5990 b->colmap = NULL; 5991 b->garray = NULL; 5992 b->roworiented = PETSC_TRUE; 5993 5994 /* stuff used for matrix vector multiply */ 5995 b->lvec = NULL; 5996 b->Mvctx = NULL; 5997 5998 /* stuff for MatGetRow() */ 5999 b->rowindices = NULL; 6000 b->rowvalues = NULL; 6001 b->getrowactive = PETSC_FALSE; 6002 6003 /* flexible pointer used in CUSP/CUSPARSE classes */ 6004 b->spptr = NULL; 6005 6006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6016 #if defined(PETSC_HAVE_MKL_SPARSE) 6017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6018 #endif 6019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6022 #if defined(PETSC_HAVE_ELEMENTAL) 6023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6024 #endif 6025 #if defined(PETSC_HAVE_SCALAPACK) 6026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6027 #endif 6028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6030 #if defined(PETSC_HAVE_HYPRE) 6031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6033 #endif 6034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6036 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6037 PetscFunctionReturn(0); 6038 } 6039 6040 /*@C 6041 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6042 and "off-diagonal" part of the matrix in CSR format. 6043 6044 Collective 6045 6046 Input Parameters: 6047 + comm - MPI communicator 6048 . m - number of local rows (Cannot be PETSC_DECIDE) 6049 . n - This value should be the same as the local size used in creating the 6050 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6051 calculated if N is given) For square matrices n is almost always m. 6052 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6053 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6054 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6055 . j - column indices 6056 . a - matrix values 6057 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6058 . oj - column indices 6059 - oa - matrix values 6060 6061 Output Parameter: 6062 . mat - the matrix 6063 6064 Level: advanced 6065 6066 Notes: 6067 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6068 must free the arrays once the matrix has been destroyed and not before. 6069 6070 The i and j indices are 0 based 6071 6072 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6073 6074 This sets local rows and cannot be used to set off-processor values. 6075 6076 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6077 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6078 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6079 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6080 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6081 communication if it is known that only local entries will be set. 6082 6083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6084 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6085 @*/ 6086 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6087 { 6088 PetscErrorCode ierr; 6089 Mat_MPIAIJ *maij; 6090 6091 PetscFunctionBegin; 6092 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6093 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6094 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6095 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6096 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6097 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6098 maij = (Mat_MPIAIJ*) (*mat)->data; 6099 6100 (*mat)->preallocated = PETSC_TRUE; 6101 6102 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6103 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6104 6105 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6106 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6107 6108 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6109 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6110 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6111 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6112 6113 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6114 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6115 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6116 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6117 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6118 PetscFunctionReturn(0); 6119 } 6120 6121 /* 6122 Special version for direct calls from Fortran 6123 */ 6124 #include <petsc/private/fortranimpl.h> 6125 6126 /* Change these macros so can be used in void function */ 6127 #undef CHKERRQ 6128 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6129 #undef SETERRQ2 6130 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6131 #undef SETERRQ3 6132 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6133 #undef SETERRQ 6134 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6135 6136 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6137 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6138 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6139 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6140 #else 6141 #endif 6142 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6143 { 6144 Mat mat = *mmat; 6145 PetscInt m = *mm, n = *mn; 6146 InsertMode addv = *maddv; 6147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6148 PetscScalar value; 6149 PetscErrorCode ierr; 6150 6151 MatCheckPreallocated(mat,1); 6152 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6153 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6154 { 6155 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6156 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6157 PetscBool roworiented = aij->roworiented; 6158 6159 /* Some Variables required in the macro */ 6160 Mat A = aij->A; 6161 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6162 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6163 MatScalar *aa = a->a; 6164 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6165 Mat B = aij->B; 6166 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6167 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6168 MatScalar *ba = b->a; 6169 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6170 * cannot use "#if defined" inside a macro. */ 6171 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6172 6173 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6174 PetscInt nonew = a->nonew; 6175 MatScalar *ap1,*ap2; 6176 6177 PetscFunctionBegin; 6178 for (i=0; i<m; i++) { 6179 if (im[i] < 0) continue; 6180 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6181 if (im[i] >= rstart && im[i] < rend) { 6182 row = im[i] - rstart; 6183 lastcol1 = -1; 6184 rp1 = aj + ai[row]; 6185 ap1 = aa + ai[row]; 6186 rmax1 = aimax[row]; 6187 nrow1 = ailen[row]; 6188 low1 = 0; 6189 high1 = nrow1; 6190 lastcol2 = -1; 6191 rp2 = bj + bi[row]; 6192 ap2 = ba + bi[row]; 6193 rmax2 = bimax[row]; 6194 nrow2 = bilen[row]; 6195 low2 = 0; 6196 high2 = nrow2; 6197 6198 for (j=0; j<n; j++) { 6199 if (roworiented) value = v[i*n+j]; 6200 else value = v[i+j*m]; 6201 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6202 if (in[j] >= cstart && in[j] < cend) { 6203 col = in[j] - cstart; 6204 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6205 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6206 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6207 #endif 6208 } else if (in[j] < 0) continue; 6209 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6210 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6211 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6212 } else { 6213 if (mat->was_assembled) { 6214 if (!aij->colmap) { 6215 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6216 } 6217 #if defined(PETSC_USE_CTABLE) 6218 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6219 col--; 6220 #else 6221 col = aij->colmap[in[j]] - 1; 6222 #endif 6223 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6224 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6225 col = in[j]; 6226 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6227 B = aij->B; 6228 b = (Mat_SeqAIJ*)B->data; 6229 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6230 rp2 = bj + bi[row]; 6231 ap2 = ba + bi[row]; 6232 rmax2 = bimax[row]; 6233 nrow2 = bilen[row]; 6234 low2 = 0; 6235 high2 = nrow2; 6236 bm = aij->B->rmap->n; 6237 ba = b->a; 6238 inserted = PETSC_FALSE; 6239 } 6240 } else col = in[j]; 6241 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6242 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6243 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6244 #endif 6245 } 6246 } 6247 } else if (!aij->donotstash) { 6248 if (roworiented) { 6249 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6250 } else { 6251 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6252 } 6253 } 6254 } 6255 } 6256 PetscFunctionReturnVoid(); 6257 } 6258