1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,p = 0,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscSFNode *rrows; 993 PetscSF sf; 994 const PetscScalar *xx; 995 PetscScalar *bb,*mask; 996 Vec xmask,lmask; 997 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 998 const PetscInt *aj, *ii,*ridx; 999 PetscScalar *aa; 1000 1001 PetscFunctionBegin; 1002 /* Create SF where leaves are input rows and roots are owned rows */ 1003 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1004 for (r = 0; r < n; ++r) lrows[r] = -1; 1005 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1006 for (r = 0; r < N; ++r) { 1007 const PetscInt idx = rows[r]; 1008 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1009 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1010 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1011 } 1012 rrows[r].rank = p; 1013 rrows[r].index = rows[r] - owners[p]; 1014 } 1015 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1016 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1017 /* Collect flags for rows to be zeroed */ 1018 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1019 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1021 /* Compress and put in row numbers */ 1022 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1023 /* zero diagonal part of matrix */ 1024 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1025 /* handle off diagonal part of matrix */ 1026 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1027 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1028 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1029 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1030 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1031 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1034 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1035 PetscBool cong; 1036 1037 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1038 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1039 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1042 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1043 } 1044 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1045 /* remove zeroed rows of off diagonal matrix */ 1046 ii = aij->i; 1047 for (i=0; i<len; i++) { 1048 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1049 } 1050 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1051 if (aij->compressedrow.use) { 1052 m = aij->compressedrow.nrows; 1053 ii = aij->compressedrow.i; 1054 ridx = aij->compressedrow.rindex; 1055 for (i=0; i<m; i++) { 1056 n = ii[i+1] - ii[i]; 1057 aj = aij->j + ii[i]; 1058 aa = aij->a + ii[i]; 1059 1060 for (j=0; j<n; j++) { 1061 if (PetscAbsScalar(mask[*aj])) { 1062 if (b) bb[*ridx] -= *aa*xx[*aj]; 1063 *aa = 0.0; 1064 } 1065 aa++; 1066 aj++; 1067 } 1068 ridx++; 1069 } 1070 } else { /* do not use compressed row format */ 1071 m = l->B->rmap->n; 1072 for (i=0; i<m; i++) { 1073 n = ii[i+1] - ii[i]; 1074 aj = aij->j + ii[i]; 1075 aa = aij->a + ii[i]; 1076 for (j=0; j<n; j++) { 1077 if (PetscAbsScalar(mask[*aj])) { 1078 if (b) bb[i] -= *aa*xx[*aj]; 1079 *aa = 0.0; 1080 } 1081 aa++; 1082 aj++; 1083 } 1084 } 1085 } 1086 if (x && b) { 1087 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1088 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1089 } 1090 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1091 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1092 ierr = PetscFree(lrows);CHKERRQ(ierr); 1093 1094 /* only change matrix nonzero state if pattern was allowed to be changed */ 1095 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1096 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1097 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 VecScatter Mvctx = a->Mvctx; 1108 1109 PetscFunctionBegin; 1110 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1111 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1112 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1290 #if defined(PETSC_HAVE_ELEMENTAL) 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1292 #endif 1293 #if defined(PETSC_HAVE_HYPRE) 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1305 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1306 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1307 PetscErrorCode ierr; 1308 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1309 int fd; 1310 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1311 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1312 PetscScalar *column_values; 1313 PetscInt message_count,flowcontrolcount; 1314 FILE *file; 1315 1316 PetscFunctionBegin; 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1319 nz = A->nz + B->nz; 1320 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1321 if (!rank) { 1322 header[0] = MAT_FILE_CLASSID; 1323 header[1] = mat->rmap->N; 1324 header[2] = mat->cmap->N; 1325 1326 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1328 /* get largest number of rows any processor has */ 1329 rlen = mat->rmap->n; 1330 range = mat->rmap->range; 1331 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1332 } else { 1333 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 rlen = mat->rmap->n; 1335 } 1336 1337 /* load up the local row counts */ 1338 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1339 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1340 1341 /* store the row lengths to the file */ 1342 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1343 if (!rank) { 1344 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1345 for (i=1; i<size; i++) { 1346 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1347 rlen = range[i+1] - range[i]; 1348 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1349 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1350 } 1351 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1352 } else { 1353 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1354 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1355 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1356 } 1357 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1358 1359 /* load up the local column indices */ 1360 nzmax = nz; /* th processor needs space a largest processor needs */ 1361 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1363 cnt = 0; 1364 for (i=0; i<mat->rmap->n; i++) { 1365 for (j=B->i[i]; j<B->i[i+1]; j++) { 1366 if ((col = garray[B->j[j]]) > cstart) break; 1367 column_indices[cnt++] = col; 1368 } 1369 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1370 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1371 } 1372 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1373 1374 /* store the column indices to the file */ 1375 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1376 if (!rank) { 1377 MPI_Status status; 1378 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 for (i=1; i<size; i++) { 1380 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1381 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1382 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1383 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1384 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1385 } 1386 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1387 } else { 1388 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1389 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1392 } 1393 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1394 1395 /* load up the local column values */ 1396 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1397 cnt = 0; 1398 for (i=0; i<mat->rmap->n; i++) { 1399 for (j=B->i[i]; j<B->i[i+1]; j++) { 1400 if (garray[B->j[j]] > cstart) break; 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1404 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1405 } 1406 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1407 1408 /* store the column values to the file */ 1409 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1410 if (!rank) { 1411 MPI_Status status; 1412 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1413 for (i=1; i<size; i++) { 1414 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1415 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1416 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1417 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1418 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1421 } else { 1422 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1423 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1424 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1426 } 1427 ierr = PetscFree(column_values);CHKERRQ(ierr); 1428 1429 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1430 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #include <petscdraw.h> 1435 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1438 PetscErrorCode ierr; 1439 PetscMPIInt rank = aij->rank,size = aij->size; 1440 PetscBool isdraw,iascii,isbinary; 1441 PetscViewer sviewer; 1442 PetscViewerFormat format; 1443 1444 PetscFunctionBegin; 1445 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1446 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1447 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1448 if (iascii) { 1449 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1450 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1451 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1452 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1453 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1454 for (i=0; i<(PetscInt)size; i++) { 1455 nmax = PetscMax(nmax,nz[i]); 1456 nmin = PetscMin(nmin,nz[i]); 1457 navg += nz[i]; 1458 } 1459 ierr = PetscFree(nz);CHKERRQ(ierr); 1460 navg = navg/size; 1461 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1465 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1466 MatInfo info; 1467 PetscBool inodes; 1468 1469 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1470 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1473 if (!inodes) { 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1475 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1476 } else { 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1478 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1479 } 1480 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1482 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1484 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1485 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1487 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1490 PetscInt inodecount,inodelimit,*inodes; 1491 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1492 if (inodes) { 1493 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1494 } else { 1495 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1496 } 1497 PetscFunctionReturn(0); 1498 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1499 PetscFunctionReturn(0); 1500 } 1501 } else if (isbinary) { 1502 if (size == 1) { 1503 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1504 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1505 } else { 1506 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } else if (iascii && size == 1) { 1510 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1511 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } else if (isdraw) { 1514 PetscDraw draw; 1515 PetscBool isnull; 1516 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1517 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1518 if (isnull) PetscFunctionReturn(0); 1519 } 1520 1521 { /* assemble the entire matrix onto first processor */ 1522 Mat A = NULL, Av; 1523 IS isrow,iscol; 1524 1525 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1526 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1527 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1528 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1529 /* The commented code uses MatCreateSubMatrices instead */ 1530 /* 1531 Mat *AA, A = NULL, Av; 1532 IS isrow,iscol; 1533 1534 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1535 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1536 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1537 if (!rank) { 1538 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1539 A = AA[0]; 1540 Av = AA[0]; 1541 } 1542 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1543 */ 1544 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1545 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1546 /* 1547 Everyone has to call to draw the matrix since the graphics waits are 1548 synchronized across all processors that share the PetscDraw object 1549 */ 1550 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1551 if (!rank) { 1552 if (((PetscObject)mat)->name) { 1553 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1554 } 1555 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1556 } 1557 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1558 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A);CHKERRQ(ierr); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool iascii,isdraw,issocket,isbinary; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1571 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1572 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1573 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1574 if (iascii || isdraw || isbinary || issocket) { 1575 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1581 { 1582 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1583 PetscErrorCode ierr; 1584 Vec bb1 = 0; 1585 PetscBool hasop; 1586 1587 PetscFunctionBegin; 1588 if (flag == SOR_APPLY_UPPER) { 1589 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1594 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1595 } 1596 1597 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1598 if (flag & SOR_ZERO_INITIAL_GUESS) { 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1600 its--; 1601 } 1602 1603 while (its--) { 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 1607 /* update rhs: bb1 = bb - B*x */ 1608 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1609 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1610 1611 /* local sweep */ 1612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1613 } 1614 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1615 if (flag & SOR_ZERO_INITIAL_GUESS) { 1616 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1617 its--; 1618 } 1619 while (its--) { 1620 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 1623 /* update rhs: bb1 = bb - B*x */ 1624 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1625 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1626 1627 /* local sweep */ 1628 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1629 } 1630 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1631 if (flag & SOR_ZERO_INITIAL_GUESS) { 1632 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1633 its--; 1634 } 1635 while (its--) { 1636 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 1639 /* update rhs: bb1 = bb - B*x */ 1640 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1641 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1642 1643 /* local sweep */ 1644 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1645 } 1646 } else if (flag & SOR_EISENSTAT) { 1647 Vec xx1; 1648 1649 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1650 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1651 1652 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 if (!mat->diag) { 1655 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1656 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1657 } 1658 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1659 if (hasop) { 1660 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1661 } else { 1662 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1663 } 1664 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1665 1666 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1667 1668 /* local sweep */ 1669 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1670 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1671 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1672 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1673 1674 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1675 1676 matin->factorerrortype = mat->A->factorerrortype; 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1681 { 1682 Mat aA,aB,Aperm; 1683 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1684 PetscScalar *aa,*ba; 1685 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1686 PetscSF rowsf,sf; 1687 IS parcolp = NULL; 1688 PetscBool done; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1693 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1694 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1695 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1696 1697 /* Invert row permutation to find out where my rows should go */ 1698 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1699 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1700 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1701 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1702 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1703 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1704 1705 /* Invert column permutation to find out where my columns should go */ 1706 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1707 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1708 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1709 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1710 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1711 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1712 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1713 1714 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1715 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1716 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1717 1718 /* Find out where my gcols should go */ 1719 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1720 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1721 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1722 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1723 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1724 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1725 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1726 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1727 1728 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1729 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1730 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1731 for (i=0; i<m; i++) { 1732 PetscInt row = rdest[i],rowner; 1733 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1734 for (j=ai[i]; j<ai[i+1]; j++) { 1735 PetscInt cowner,col = cdest[aj[j]]; 1736 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1737 if (rowner == cowner) dnnz[i]++; 1738 else onnz[i]++; 1739 } 1740 for (j=bi[i]; j<bi[i+1]; j++) { 1741 PetscInt cowner,col = gcdest[bj[j]]; 1742 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1743 if (rowner == cowner) dnnz[i]++; 1744 else onnz[i]++; 1745 } 1746 } 1747 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1748 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1752 1753 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1754 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1755 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1758 PetscInt j0,rowlen; 1759 rowlen = ai[i+1] - ai[i]; 1760 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1761 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1762 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1763 } 1764 rowlen = bi[i+1] - bi[i]; 1765 for (j0=j=0; j<rowlen; j0=j) { 1766 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1767 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1768 } 1769 } 1770 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1771 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1772 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1773 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1774 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1775 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1776 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1777 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1778 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1779 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1780 *B = Aperm; 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1791 if (ghosts) *ghosts = aij->garray; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1796 { 1797 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1798 Mat A = mat->A,B = mat->B; 1799 PetscErrorCode ierr; 1800 PetscLogDouble isend[5],irecv[5]; 1801 1802 PetscFunctionBegin; 1803 info->block_size = 1.0; 1804 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1805 1806 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1807 isend[3] = info->memory; isend[4] = info->mallocs; 1808 1809 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1810 1811 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1812 isend[3] += info->memory; isend[4] += info->mallocs; 1813 if (flag == MAT_LOCAL) { 1814 info->nz_used = isend[0]; 1815 info->nz_allocated = isend[1]; 1816 info->nz_unneeded = isend[2]; 1817 info->memory = isend[3]; 1818 info->mallocs = isend[4]; 1819 } else if (flag == MAT_GLOBAL_MAX) { 1820 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1821 1822 info->nz_used = irecv[0]; 1823 info->nz_allocated = irecv[1]; 1824 info->nz_unneeded = irecv[2]; 1825 info->memory = irecv[3]; 1826 info->mallocs = irecv[4]; 1827 } else if (flag == MAT_GLOBAL_SUM) { 1828 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1829 1830 info->nz_used = irecv[0]; 1831 info->nz_allocated = irecv[1]; 1832 info->nz_unneeded = irecv[2]; 1833 info->memory = irecv[3]; 1834 info->mallocs = irecv[4]; 1835 } 1836 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1837 info->fill_ratio_needed = 0; 1838 info->factor_mallocs = 0; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1843 { 1844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 switch (op) { 1849 case MAT_NEW_NONZERO_LOCATIONS: 1850 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1851 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1852 case MAT_KEEP_NONZERO_PATTERN: 1853 case MAT_NEW_NONZERO_LOCATION_ERR: 1854 case MAT_USE_INODES: 1855 case MAT_IGNORE_ZERO_ENTRIES: 1856 MatCheckPreallocated(A,1); 1857 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1858 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1859 break; 1860 case MAT_ROW_ORIENTED: 1861 MatCheckPreallocated(A,1); 1862 a->roworiented = flg; 1863 1864 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1865 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1866 break; 1867 case MAT_NEW_DIAGONALS: 1868 case MAT_SORTED_FULL: 1869 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1870 break; 1871 case MAT_IGNORE_OFF_PROC_ENTRIES: 1872 a->donotstash = flg; 1873 break; 1874 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1875 case MAT_SPD: 1876 case MAT_SYMMETRIC: 1877 case MAT_STRUCTURALLY_SYMMETRIC: 1878 case MAT_HERMITIAN: 1879 case MAT_SYMMETRY_ETERNAL: 1880 break; 1881 case MAT_SUBMAT_SINGLEIS: 1882 A->submat_singleis = flg; 1883 break; 1884 case MAT_STRUCTURE_ONLY: 1885 /* The option is handled directly by MatSetOption() */ 1886 break; 1887 default: 1888 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1894 { 1895 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1896 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1897 PetscErrorCode ierr; 1898 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1899 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1900 PetscInt *cmap,*idx_p; 1901 1902 PetscFunctionBegin; 1903 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1904 mat->getrowactive = PETSC_TRUE; 1905 1906 if (!mat->rowvalues && (idx || v)) { 1907 /* 1908 allocate enough space to hold information from the longest row. 1909 */ 1910 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1911 PetscInt max = 1,tmp; 1912 for (i=0; i<matin->rmap->n; i++) { 1913 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1914 if (max < tmp) max = tmp; 1915 } 1916 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1917 } 1918 1919 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1920 lrow = row - rstart; 1921 1922 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1923 if (!v) {pvA = 0; pvB = 0;} 1924 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1925 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 nztot = nzA + nzB; 1928 1929 cmap = mat->garray; 1930 if (v || idx) { 1931 if (nztot) { 1932 /* Sort by increasing column numbers, assuming A and B already sorted */ 1933 PetscInt imark = -1; 1934 if (v) { 1935 *v = v_p = mat->rowvalues; 1936 for (i=0; i<nzB; i++) { 1937 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1938 else break; 1939 } 1940 imark = i; 1941 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1942 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1943 } 1944 if (idx) { 1945 *idx = idx_p = mat->rowindices; 1946 if (imark > -1) { 1947 for (i=0; i<imark; i++) { 1948 idx_p[i] = cmap[cworkB[i]]; 1949 } 1950 } else { 1951 for (i=0; i<nzB; i++) { 1952 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1953 else break; 1954 } 1955 imark = i; 1956 } 1957 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1958 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1959 } 1960 } else { 1961 if (idx) *idx = 0; 1962 if (v) *v = 0; 1963 } 1964 } 1965 *nz = nztot; 1966 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1967 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1974 1975 PetscFunctionBegin; 1976 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1977 aij->getrowactive = PETSC_FALSE; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1984 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1985 PetscErrorCode ierr; 1986 PetscInt i,j,cstart = mat->cmap->rstart; 1987 PetscReal sum = 0.0; 1988 MatScalar *v; 1989 1990 PetscFunctionBegin; 1991 if (aij->size == 1) { 1992 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1993 } else { 1994 if (type == NORM_FROBENIUS) { 1995 v = amat->a; 1996 for (i=0; i<amat->nz; i++) { 1997 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1998 } 1999 v = bmat->a; 2000 for (i=0; i<bmat->nz; i++) { 2001 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2002 } 2003 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2004 *norm = PetscSqrtReal(*norm); 2005 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 2006 } else if (type == NORM_1) { /* max column norm */ 2007 PetscReal *tmp,*tmp2; 2008 PetscInt *jj,*garray = aij->garray; 2009 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 2010 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 2011 *norm = 0.0; 2012 v = amat->a; jj = amat->j; 2013 for (j=0; j<amat->nz; j++) { 2014 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2015 } 2016 v = bmat->a; jj = bmat->j; 2017 for (j=0; j<bmat->nz; j++) { 2018 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2019 } 2020 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2021 for (j=0; j<mat->cmap->N; j++) { 2022 if (tmp2[j] > *norm) *norm = tmp2[j]; 2023 } 2024 ierr = PetscFree(tmp);CHKERRQ(ierr); 2025 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2026 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2027 } else if (type == NORM_INFINITY) { /* max row norm */ 2028 PetscReal ntemp = 0.0; 2029 for (j=0; j<aij->A->rmap->n; j++) { 2030 v = amat->a + amat->i[j]; 2031 sum = 0.0; 2032 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2033 sum += PetscAbsScalar(*v); v++; 2034 } 2035 v = bmat->a + bmat->i[j]; 2036 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2037 sum += PetscAbsScalar(*v); v++; 2038 } 2039 if (sum > ntemp) ntemp = sum; 2040 } 2041 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2042 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2043 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2049 { 2050 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2051 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2052 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2053 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2054 PetscErrorCode ierr; 2055 Mat B,A_diag,*B_diag; 2056 const MatScalar *array; 2057 2058 PetscFunctionBegin; 2059 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2060 ai = Aloc->i; aj = Aloc->j; 2061 bi = Bloc->i; bj = Bloc->j; 2062 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2063 PetscInt *d_nnz,*g_nnz,*o_nnz; 2064 PetscSFNode *oloc; 2065 PETSC_UNUSED PetscSF sf; 2066 2067 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2068 /* compute d_nnz for preallocation */ 2069 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2070 for (i=0; i<ai[ma]; i++) { 2071 d_nnz[aj[i]]++; 2072 } 2073 /* compute local off-diagonal contributions */ 2074 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2075 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2076 /* map those to global */ 2077 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2078 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2079 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2080 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2081 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2082 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2083 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2084 2085 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2086 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2087 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2088 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2089 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2090 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2091 } else { 2092 B = *matout; 2093 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2094 } 2095 2096 b = (Mat_MPIAIJ*)B->data; 2097 A_diag = a->A; 2098 B_diag = &b->A; 2099 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2100 A_diag_ncol = A_diag->cmap->N; 2101 B_diag_ilen = sub_B_diag->ilen; 2102 B_diag_i = sub_B_diag->i; 2103 2104 /* Set ilen for diagonal of B */ 2105 for (i=0; i<A_diag_ncol; i++) { 2106 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2107 } 2108 2109 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2110 very quickly (=without using MatSetValues), because all writes are local. */ 2111 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2112 2113 /* copy over the B part */ 2114 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2115 array = Bloc->a; 2116 row = A->rmap->rstart; 2117 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2118 cols_tmp = cols; 2119 for (i=0; i<mb; i++) { 2120 ncol = bi[i+1]-bi[i]; 2121 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2122 row++; 2123 array += ncol; cols_tmp += ncol; 2124 } 2125 ierr = PetscFree(cols);CHKERRQ(ierr); 2126 2127 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2129 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2130 *matout = B; 2131 } else { 2132 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2138 { 2139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2140 Mat a = aij->A,b = aij->B; 2141 PetscErrorCode ierr; 2142 PetscInt s1,s2,s3; 2143 2144 PetscFunctionBegin; 2145 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2146 if (rr) { 2147 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2148 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2149 /* Overlap communication with computation. */ 2150 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2151 } 2152 if (ll) { 2153 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2154 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2155 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2156 } 2157 /* scale the diagonal block */ 2158 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2159 2160 if (rr) { 2161 /* Do a scatter end and then right scale the off-diagonal block */ 2162 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2163 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2169 { 2170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2179 { 2180 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2181 Mat a,b,c,d; 2182 PetscBool flg; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 a = matA->A; b = matA->B; 2187 c = matB->A; d = matB->B; 2188 2189 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2190 if (flg) { 2191 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2192 } 2193 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2198 { 2199 PetscErrorCode ierr; 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2202 2203 PetscFunctionBegin; 2204 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2205 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2206 /* because of the column compression in the off-processor part of the matrix a->B, 2207 the number of columns in a->B and b->B may be different, hence we cannot call 2208 the MatCopy() directly on the two parts. If need be, we can provide a more 2209 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2210 then copying the submatrices */ 2211 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2212 } else { 2213 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2214 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2215 } 2216 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2221 { 2222 PetscErrorCode ierr; 2223 2224 PetscFunctionBegin; 2225 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 /* 2230 Computes the number of nonzeros per row needed for preallocation when X and Y 2231 have different nonzero structure. 2232 */ 2233 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2234 { 2235 PetscInt i,j,k,nzx,nzy; 2236 2237 PetscFunctionBegin; 2238 /* Set the number of nonzeros in the new matrix */ 2239 for (i=0; i<m; i++) { 2240 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2241 nzx = xi[i+1] - xi[i]; 2242 nzy = yi[i+1] - yi[i]; 2243 nnz[i] = 0; 2244 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2245 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2246 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2247 nnz[i]++; 2248 } 2249 for (; k<nzy; k++) nnz[i]++; 2250 } 2251 PetscFunctionReturn(0); 2252 } 2253 2254 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2255 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2256 { 2257 PetscErrorCode ierr; 2258 PetscInt m = Y->rmap->N; 2259 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2260 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2261 2262 PetscFunctionBegin; 2263 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2268 { 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2271 PetscBLASInt bnz,one=1; 2272 Mat_SeqAIJ *x,*y; 2273 2274 PetscFunctionBegin; 2275 if (str == SAME_NONZERO_PATTERN) { 2276 PetscScalar alpha = a; 2277 x = (Mat_SeqAIJ*)xx->A->data; 2278 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2279 y = (Mat_SeqAIJ*)yy->A->data; 2280 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2281 x = (Mat_SeqAIJ*)xx->B->data; 2282 y = (Mat_SeqAIJ*)yy->B->data; 2283 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2285 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2286 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2287 will be updated */ 2288 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2289 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2290 Y->offloadmask = PETSC_OFFLOAD_CPU; 2291 } 2292 #endif 2293 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2294 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2295 } else { 2296 Mat B; 2297 PetscInt *nnz_d,*nnz_o; 2298 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2299 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2300 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2301 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2302 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2303 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2304 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2305 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2306 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2307 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2308 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2309 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2310 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2311 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2312 } 2313 PetscFunctionReturn(0); 2314 } 2315 2316 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2317 2318 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2319 { 2320 #if defined(PETSC_USE_COMPLEX) 2321 PetscErrorCode ierr; 2322 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2323 2324 PetscFunctionBegin; 2325 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2326 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2327 #else 2328 PetscFunctionBegin; 2329 #endif 2330 PetscFunctionReturn(0); 2331 } 2332 2333 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2334 { 2335 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2336 PetscErrorCode ierr; 2337 2338 PetscFunctionBegin; 2339 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2340 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2345 { 2346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2351 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2356 { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2358 PetscErrorCode ierr; 2359 PetscInt i,*idxb = 0; 2360 PetscScalar *va,*vb; 2361 Vec vtmp; 2362 2363 PetscFunctionBegin; 2364 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2365 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2366 if (idx) { 2367 for (i=0; i<A->rmap->n; i++) { 2368 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2369 } 2370 } 2371 2372 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2373 if (idx) { 2374 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2375 } 2376 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2377 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2378 2379 for (i=0; i<A->rmap->n; i++) { 2380 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2381 va[i] = vb[i]; 2382 if (idx) idx[i] = a->garray[idxb[i]]; 2383 } 2384 } 2385 2386 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2387 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2388 ierr = PetscFree(idxb);CHKERRQ(ierr); 2389 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2396 PetscErrorCode ierr; 2397 PetscInt i,*idxb = 0; 2398 PetscScalar *va,*vb; 2399 Vec vtmp; 2400 2401 PetscFunctionBegin; 2402 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2403 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2404 if (idx) { 2405 for (i=0; i<A->cmap->n; i++) { 2406 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2407 } 2408 } 2409 2410 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2411 if (idx) { 2412 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2413 } 2414 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2415 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2416 2417 for (i=0; i<A->rmap->n; i++) { 2418 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2419 va[i] = vb[i]; 2420 if (idx) idx[i] = a->garray[idxb[i]]; 2421 } 2422 } 2423 2424 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2426 ierr = PetscFree(idxb);CHKERRQ(ierr); 2427 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2434 PetscInt n = A->rmap->n; 2435 PetscInt cstart = A->cmap->rstart; 2436 PetscInt *cmap = mat->garray; 2437 PetscInt *diagIdx, *offdiagIdx; 2438 Vec diagV, offdiagV; 2439 PetscScalar *a, *diagA, *offdiagA; 2440 PetscInt r; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2448 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2449 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 for (r = 0; r < n; ++r) { 2453 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2454 a[r] = diagA[r]; 2455 idx[r] = cstart + diagIdx[r]; 2456 } else { 2457 a[r] = offdiagA[r]; 2458 idx[r] = cmap[offdiagIdx[r]]; 2459 } 2460 } 2461 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2464 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2465 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2466 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2471 { 2472 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2473 PetscInt n = A->rmap->n; 2474 PetscInt cstart = A->cmap->rstart; 2475 PetscInt *cmap = mat->garray; 2476 PetscInt *diagIdx, *offdiagIdx; 2477 Vec diagV, offdiagV; 2478 PetscScalar *a, *diagA, *offdiagA; 2479 PetscInt r; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2484 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2485 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2486 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2487 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2488 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2489 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2490 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2491 for (r = 0; r < n; ++r) { 2492 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2493 a[r] = diagA[r]; 2494 idx[r] = cstart + diagIdx[r]; 2495 } else { 2496 a[r] = offdiagA[r]; 2497 idx[r] = cmap[offdiagIdx[r]]; 2498 } 2499 } 2500 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2501 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2502 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2503 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2504 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2505 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2510 { 2511 PetscErrorCode ierr; 2512 Mat *dummy; 2513 2514 PetscFunctionBegin; 2515 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2516 *newmat = *dummy; 2517 ierr = PetscFree(dummy);CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2522 { 2523 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2528 A->factorerrortype = a->A->factorerrortype; 2529 PetscFunctionReturn(0); 2530 } 2531 2532 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2533 { 2534 PetscErrorCode ierr; 2535 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2536 2537 PetscFunctionBegin; 2538 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2539 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2540 if (x->assembled) { 2541 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2542 } else { 2543 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2544 } 2545 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2551 { 2552 PetscFunctionBegin; 2553 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2554 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 /*@ 2559 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2560 2561 Collective on Mat 2562 2563 Input Parameters: 2564 + A - the matrix 2565 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2566 2567 Level: advanced 2568 2569 @*/ 2570 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2571 { 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2580 { 2581 PetscErrorCode ierr; 2582 PetscBool sc = PETSC_FALSE,flg; 2583 2584 PetscFunctionBegin; 2585 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2586 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2587 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2588 if (flg) { 2589 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2590 } 2591 ierr = PetscOptionsTail();CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2596 { 2597 PetscErrorCode ierr; 2598 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2599 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2600 2601 PetscFunctionBegin; 2602 if (!Y->preallocated) { 2603 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2604 } else if (!aij->nz) { 2605 PetscInt nonew = aij->nonew; 2606 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2607 aij->nonew = nonew; 2608 } 2609 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2616 PetscErrorCode ierr; 2617 2618 PetscFunctionBegin; 2619 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2620 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2621 if (d) { 2622 PetscInt rstart; 2623 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2624 *d += rstart; 2625 2626 } 2627 PetscFunctionReturn(0); 2628 } 2629 2630 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2631 { 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 PetscErrorCode ierr; 2634 2635 PetscFunctionBegin; 2636 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* -------------------------------------------------------------------*/ 2641 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2642 MatGetRow_MPIAIJ, 2643 MatRestoreRow_MPIAIJ, 2644 MatMult_MPIAIJ, 2645 /* 4*/ MatMultAdd_MPIAIJ, 2646 MatMultTranspose_MPIAIJ, 2647 MatMultTransposeAdd_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*10*/ 0, 2652 0, 2653 0, 2654 MatSOR_MPIAIJ, 2655 MatTranspose_MPIAIJ, 2656 /*15*/ MatGetInfo_MPIAIJ, 2657 MatEqual_MPIAIJ, 2658 MatGetDiagonal_MPIAIJ, 2659 MatDiagonalScale_MPIAIJ, 2660 MatNorm_MPIAIJ, 2661 /*20*/ MatAssemblyBegin_MPIAIJ, 2662 MatAssemblyEnd_MPIAIJ, 2663 MatSetOption_MPIAIJ, 2664 MatZeroEntries_MPIAIJ, 2665 /*24*/ MatZeroRows_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*29*/ MatSetUp_MPIAIJ, 2671 0, 2672 0, 2673 MatGetDiagonalBlock_MPIAIJ, 2674 0, 2675 /*34*/ MatDuplicate_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*39*/ MatAXPY_MPIAIJ, 2681 MatCreateSubMatrices_MPIAIJ, 2682 MatIncreaseOverlap_MPIAIJ, 2683 MatGetValues_MPIAIJ, 2684 MatCopy_MPIAIJ, 2685 /*44*/ MatGetRowMax_MPIAIJ, 2686 MatScale_MPIAIJ, 2687 MatShift_MPIAIJ, 2688 MatDiagonalSet_MPIAIJ, 2689 MatZeroRowsColumns_MPIAIJ, 2690 /*49*/ MatSetRandom_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*54*/ MatFDColoringCreate_MPIXAIJ, 2696 0, 2697 MatSetUnfactored_MPIAIJ, 2698 MatPermute_MPIAIJ, 2699 0, 2700 /*59*/ MatCreateSubMatrix_MPIAIJ, 2701 MatDestroy_MPIAIJ, 2702 MatView_MPIAIJ, 2703 0, 2704 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2705 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2706 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2707 0, 2708 0, 2709 0, 2710 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2711 MatGetRowMinAbs_MPIAIJ, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*75*/ MatFDColoringApply_AIJ, 2717 MatSetFromOptions_MPIAIJ, 2718 0, 2719 0, 2720 MatFindZeroDiagonals_MPIAIJ, 2721 /*80*/ 0, 2722 0, 2723 0, 2724 /*83*/ MatLoad_MPIAIJ, 2725 MatIsSymmetric_MPIAIJ, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2731 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2732 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2733 MatPtAP_MPIAIJ_MPIAIJ, 2734 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2735 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2736 0, 2737 0, 2738 0, 2739 MatPinToCPU_MPIAIJ, 2740 /*99*/ 0, 2741 0, 2742 0, 2743 MatConjugate_MPIAIJ, 2744 0, 2745 /*104*/MatSetValuesRow_MPIAIJ, 2746 MatRealPart_MPIAIJ, 2747 MatImaginaryPart_MPIAIJ, 2748 0, 2749 0, 2750 /*109*/0, 2751 0, 2752 MatGetRowMin_MPIAIJ, 2753 0, 2754 MatMissingDiagonal_MPIAIJ, 2755 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2756 0, 2757 MatGetGhosts_MPIAIJ, 2758 0, 2759 0, 2760 /*119*/0, 2761 0, 2762 0, 2763 0, 2764 MatGetMultiProcBlock_MPIAIJ, 2765 /*124*/MatFindNonzeroRows_MPIAIJ, 2766 MatGetColumnNorms_MPIAIJ, 2767 MatInvertBlockDiagonal_MPIAIJ, 2768 MatInvertVariableBlockDiagonal_MPIAIJ, 2769 MatCreateSubMatricesMPI_MPIAIJ, 2770 /*129*/0, 2771 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2772 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2773 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2774 0, 2775 /*134*/0, 2776 0, 2777 MatRARt_MPIAIJ_MPIAIJ, 2778 0, 2779 0, 2780 /*139*/MatSetBlockSizes_MPIAIJ, 2781 0, 2782 0, 2783 MatFDColoringSetUp_MPIXAIJ, 2784 MatFindOffBlockDiagonalEntries_MPIAIJ, 2785 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2786 }; 2787 2788 /* ----------------------------------------------------------------------------------------*/ 2789 2790 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2791 { 2792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2797 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2802 { 2803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2808 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2813 { 2814 Mat_MPIAIJ *b; 2815 PetscErrorCode ierr; 2816 PetscMPIInt size; 2817 2818 PetscFunctionBegin; 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 b = (Mat_MPIAIJ*)B->data; 2822 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2827 #endif 2828 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2829 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2830 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2831 2832 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2834 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2835 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2836 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2837 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2838 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2840 2841 if (!B->preallocated) { 2842 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2843 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2844 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2845 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2847 } 2848 2849 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2850 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2851 B->preallocated = PETSC_TRUE; 2852 B->was_assembled = PETSC_FALSE; 2853 B->assembled = PETSC_FALSE; 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2858 { 2859 Mat_MPIAIJ *b; 2860 PetscErrorCode ierr; 2861 2862 PetscFunctionBegin; 2863 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2864 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2866 b = (Mat_MPIAIJ*)B->data; 2867 2868 #if defined(PETSC_USE_CTABLE) 2869 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2870 #else 2871 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2872 #endif 2873 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2874 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2875 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2876 2877 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2878 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2886 { 2887 Mat mat; 2888 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2889 PetscErrorCode ierr; 2890 2891 PetscFunctionBegin; 2892 *newmat = 0; 2893 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2894 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2895 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2896 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2897 a = (Mat_MPIAIJ*)mat->data; 2898 2899 mat->factortype = matin->factortype; 2900 mat->assembled = PETSC_TRUE; 2901 mat->insertmode = NOT_SET_VALUES; 2902 mat->preallocated = PETSC_TRUE; 2903 2904 a->size = oldmat->size; 2905 a->rank = oldmat->rank; 2906 a->donotstash = oldmat->donotstash; 2907 a->roworiented = oldmat->roworiented; 2908 a->rowindices = 0; 2909 a->rowvalues = 0; 2910 a->getrowactive = PETSC_FALSE; 2911 2912 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2913 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2914 2915 if (oldmat->colmap) { 2916 #if defined(PETSC_USE_CTABLE) 2917 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2918 #else 2919 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2921 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2922 #endif 2923 } else a->colmap = 0; 2924 if (oldmat->garray) { 2925 PetscInt len; 2926 len = oldmat->B->cmap->n; 2927 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2928 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2929 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2930 } else a->garray = 0; 2931 2932 /* It may happen MatDuplicate is called with a non-assembled matrix 2933 In fact, MatDuplicate only requires the matrix to be preallocated 2934 This may happen inside a DMCreateMatrix_Shell */ 2935 if (oldmat->lvec) { 2936 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2937 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2938 } 2939 if (oldmat->Mvctx) { 2940 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2941 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2942 } 2943 if (oldmat->Mvctx_mpi1) { 2944 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2945 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2946 } 2947 2948 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2949 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2950 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2951 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2952 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2953 *newmat = mat; 2954 PetscFunctionReturn(0); 2955 } 2956 2957 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2958 { 2959 PetscBool isbinary, ishdf5; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2964 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2965 /* force binary viewer to load .info file if it has not yet done so */ 2966 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2967 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2968 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2969 if (isbinary) { 2970 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2971 } else if (ishdf5) { 2972 #if defined(PETSC_HAVE_HDF5) 2973 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2974 #else 2975 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2976 #endif 2977 } else { 2978 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2984 { 2985 PetscScalar *vals,*svals; 2986 MPI_Comm comm; 2987 PetscErrorCode ierr; 2988 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2989 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2990 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2991 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2992 PetscInt cend,cstart,n,*rowners; 2993 int fd; 2994 PetscInt bs = newMat->rmap->bs; 2995 2996 PetscFunctionBegin; 2997 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2998 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2999 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3000 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3001 if (!rank) { 3002 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3003 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3004 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 3005 } 3006 3007 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 3008 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3009 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3010 if (bs < 0) bs = 1; 3011 3012 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3013 M = header[1]; N = header[2]; 3014 3015 /* If global sizes are set, check if they are consistent with that given in the file */ 3016 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3017 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3018 3019 /* determine ownership of all (block) rows */ 3020 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3021 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3022 else m = newMat->rmap->n; /* Set by user */ 3023 3024 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3025 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3026 3027 /* First process needs enough room for process with most rows */ 3028 if (!rank) { 3029 mmax = rowners[1]; 3030 for (i=2; i<=size; i++) { 3031 mmax = PetscMax(mmax, rowners[i]); 3032 } 3033 } else mmax = -1; /* unused, but compilers complain */ 3034 3035 rowners[0] = 0; 3036 for (i=2; i<=size; i++) { 3037 rowners[i] += rowners[i-1]; 3038 } 3039 rstart = rowners[rank]; 3040 rend = rowners[rank+1]; 3041 3042 /* distribute row lengths to all processors */ 3043 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3044 if (!rank) { 3045 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3047 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3048 for (j=0; j<m; j++) { 3049 procsnz[0] += ourlens[j]; 3050 } 3051 for (i=1; i<size; i++) { 3052 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3053 /* calculate the number of nonzeros on each processor */ 3054 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3055 procsnz[i] += rowlengths[j]; 3056 } 3057 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3058 } 3059 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3060 } else { 3061 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3062 } 3063 3064 if (!rank) { 3065 /* determine max buffer needed and allocate it */ 3066 maxnz = 0; 3067 for (i=0; i<size; i++) { 3068 maxnz = PetscMax(maxnz,procsnz[i]); 3069 } 3070 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3071 3072 /* read in my part of the matrix column indices */ 3073 nz = procsnz[0]; 3074 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3075 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3076 3077 /* read in every one elses and ship off */ 3078 for (i=1; i<size; i++) { 3079 nz = procsnz[i]; 3080 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3081 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3082 } 3083 ierr = PetscFree(cols);CHKERRQ(ierr); 3084 } else { 3085 /* determine buffer space needed for message */ 3086 nz = 0; 3087 for (i=0; i<m; i++) { 3088 nz += ourlens[i]; 3089 } 3090 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3091 3092 /* receive message of column indices*/ 3093 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3094 } 3095 3096 /* determine column ownership if matrix is not square */ 3097 if (N != M) { 3098 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3099 else n = newMat->cmap->n; 3100 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3101 cstart = cend - n; 3102 } else { 3103 cstart = rstart; 3104 cend = rend; 3105 n = cend - cstart; 3106 } 3107 3108 /* loop over local rows, determining number of off diagonal entries */ 3109 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3110 jj = 0; 3111 for (i=0; i<m; i++) { 3112 for (j=0; j<ourlens[i]; j++) { 3113 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3114 jj++; 3115 } 3116 } 3117 3118 for (i=0; i<m; i++) { 3119 ourlens[i] -= offlens[i]; 3120 } 3121 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3122 3123 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3124 3125 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3126 3127 for (i=0; i<m; i++) { 3128 ourlens[i] += offlens[i]; 3129 } 3130 3131 if (!rank) { 3132 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3133 3134 /* read in my part of the matrix numerical values */ 3135 nz = procsnz[0]; 3136 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3137 3138 /* insert into matrix */ 3139 jj = rstart; 3140 smycols = mycols; 3141 svals = vals; 3142 for (i=0; i<m; i++) { 3143 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3144 smycols += ourlens[i]; 3145 svals += ourlens[i]; 3146 jj++; 3147 } 3148 3149 /* read in other processors and ship out */ 3150 for (i=1; i<size; i++) { 3151 nz = procsnz[i]; 3152 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3153 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3154 } 3155 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3156 } else { 3157 /* receive numeric values */ 3158 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3159 3160 /* receive message of values*/ 3161 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3162 3163 /* insert into matrix */ 3164 jj = rstart; 3165 smycols = mycols; 3166 svals = vals; 3167 for (i=0; i<m; i++) { 3168 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3169 smycols += ourlens[i]; 3170 svals += ourlens[i]; 3171 jj++; 3172 } 3173 } 3174 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3175 ierr = PetscFree(vals);CHKERRQ(ierr); 3176 ierr = PetscFree(mycols);CHKERRQ(ierr); 3177 ierr = PetscFree(rowners);CHKERRQ(ierr); 3178 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3179 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 /* Not scalable because of ISAllGather() unless getting all columns. */ 3184 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3185 { 3186 PetscErrorCode ierr; 3187 IS iscol_local; 3188 PetscBool isstride; 3189 PetscMPIInt lisstride=0,gisstride; 3190 3191 PetscFunctionBegin; 3192 /* check if we are grabbing all columns*/ 3193 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3194 3195 if (isstride) { 3196 PetscInt start,len,mstart,mlen; 3197 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3198 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3199 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3200 if (mstart == start && mlen-mstart == len) lisstride = 1; 3201 } 3202 3203 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3204 if (gisstride) { 3205 PetscInt N; 3206 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3207 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3208 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3209 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3210 } else { 3211 PetscInt cbs; 3212 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3213 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3214 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3215 } 3216 3217 *isseq = iscol_local; 3218 PetscFunctionReturn(0); 3219 } 3220 3221 /* 3222 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3223 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3224 3225 Input Parameters: 3226 mat - matrix 3227 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3228 i.e., mat->rstart <= isrow[i] < mat->rend 3229 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3230 i.e., mat->cstart <= iscol[i] < mat->cend 3231 Output Parameter: 3232 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3233 iscol_o - sequential column index set for retrieving mat->B 3234 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3235 */ 3236 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3237 { 3238 PetscErrorCode ierr; 3239 Vec x,cmap; 3240 const PetscInt *is_idx; 3241 PetscScalar *xarray,*cmaparray; 3242 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3244 Mat B=a->B; 3245 Vec lvec=a->lvec,lcmap; 3246 PetscInt i,cstart,cend,Bn=B->cmap->N; 3247 MPI_Comm comm; 3248 VecScatter Mvctx=a->Mvctx; 3249 3250 PetscFunctionBegin; 3251 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3252 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3253 3254 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3255 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3256 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3257 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3258 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3259 3260 /* Get start indices */ 3261 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3262 isstart -= ncols; 3263 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3264 3265 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3266 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3267 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3268 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3269 for (i=0; i<ncols; i++) { 3270 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3271 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3272 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3273 } 3274 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3275 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3276 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3277 3278 /* Get iscol_d */ 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3280 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3281 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3282 3283 /* Get isrow_d */ 3284 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3285 rstart = mat->rmap->rstart; 3286 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3287 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3288 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3289 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3290 3291 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3292 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3293 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3294 3295 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3296 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3297 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3298 3299 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3300 3301 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3302 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3303 3304 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3305 /* off-process column indices */ 3306 count = 0; 3307 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3308 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3309 3310 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3311 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3312 for (i=0; i<Bn; i++) { 3313 if (PetscRealPart(xarray[i]) > -1.0) { 3314 idx[count] = i; /* local column index in off-diagonal part B */ 3315 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3316 count++; 3317 } 3318 } 3319 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3320 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3321 3322 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3323 /* cannot ensure iscol_o has same blocksize as iscol! */ 3324 3325 ierr = PetscFree(idx);CHKERRQ(ierr); 3326 *garray = cmap1; 3327 3328 ierr = VecDestroy(&x);CHKERRQ(ierr); 3329 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3330 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3335 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3336 { 3337 PetscErrorCode ierr; 3338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3339 Mat M = NULL; 3340 MPI_Comm comm; 3341 IS iscol_d,isrow_d,iscol_o; 3342 Mat Asub = NULL,Bsub = NULL; 3343 PetscInt n; 3344 3345 PetscFunctionBegin; 3346 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3347 3348 if (call == MAT_REUSE_MATRIX) { 3349 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3350 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3351 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3352 3353 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3354 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3355 3356 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3357 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3358 3359 /* Update diagonal and off-diagonal portions of submat */ 3360 asub = (Mat_MPIAIJ*)(*submat)->data; 3361 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3362 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3363 if (n) { 3364 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3365 } 3366 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 3369 } else { /* call == MAT_INITIAL_MATRIX) */ 3370 const PetscInt *garray; 3371 PetscInt BsubN; 3372 3373 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3374 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3375 3376 /* Create local submatrices Asub and Bsub */ 3377 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3378 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3379 3380 /* Create submatrix M */ 3381 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3382 3383 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3384 asub = (Mat_MPIAIJ*)M->data; 3385 3386 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3387 n = asub->B->cmap->N; 3388 if (BsubN > n) { 3389 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3390 const PetscInt *idx; 3391 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3392 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3393 3394 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3395 j = 0; 3396 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3397 for (i=0; i<n; i++) { 3398 if (j >= BsubN) break; 3399 while (subgarray[i] > garray[j]) j++; 3400 3401 if (subgarray[i] == garray[j]) { 3402 idx_new[i] = idx[j++]; 3403 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3404 } 3405 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3406 3407 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3408 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3409 3410 } else if (BsubN < n) { 3411 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3412 } 3413 3414 ierr = PetscFree(garray);CHKERRQ(ierr); 3415 *submat = M; 3416 3417 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3418 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3419 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3420 3421 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3422 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3423 3424 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3425 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3426 } 3427 PetscFunctionReturn(0); 3428 } 3429 3430 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3431 { 3432 PetscErrorCode ierr; 3433 IS iscol_local=NULL,isrow_d; 3434 PetscInt csize; 3435 PetscInt n,i,j,start,end; 3436 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3437 MPI_Comm comm; 3438 3439 PetscFunctionBegin; 3440 /* If isrow has same processor distribution as mat, 3441 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3442 if (call == MAT_REUSE_MATRIX) { 3443 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3444 if (isrow_d) { 3445 sameRowDist = PETSC_TRUE; 3446 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3447 } else { 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3449 if (iscol_local) { 3450 sameRowDist = PETSC_TRUE; 3451 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3452 } 3453 } 3454 } else { 3455 /* Check if isrow has same processor distribution as mat */ 3456 sameDist[0] = PETSC_FALSE; 3457 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3458 if (!n) { 3459 sameDist[0] = PETSC_TRUE; 3460 } else { 3461 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3462 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3463 if (i >= start && j < end) { 3464 sameDist[0] = PETSC_TRUE; 3465 } 3466 } 3467 3468 /* Check if iscol has same processor distribution as mat */ 3469 sameDist[1] = PETSC_FALSE; 3470 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3471 if (!n) { 3472 sameDist[1] = PETSC_TRUE; 3473 } else { 3474 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3475 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3476 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3477 } 3478 3479 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3480 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3481 sameRowDist = tsameDist[0]; 3482 } 3483 3484 if (sameRowDist) { 3485 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3486 /* isrow and iscol have same processor distribution as mat */ 3487 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } else { /* sameRowDist */ 3490 /* isrow has same processor distribution as mat */ 3491 if (call == MAT_INITIAL_MATRIX) { 3492 PetscBool sorted; 3493 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3494 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3495 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3496 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3497 3498 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3499 if (sorted) { 3500 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3501 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 } else { /* call == MAT_REUSE_MATRIX */ 3505 IS iscol_sub; 3506 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3507 if (iscol_sub) { 3508 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3509 PetscFunctionReturn(0); 3510 } 3511 } 3512 } 3513 } 3514 3515 /* General case: iscol -> iscol_local which has global size of iscol */ 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3518 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3519 } else { 3520 if (!iscol_local) { 3521 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3522 } 3523 } 3524 3525 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3526 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3527 3528 if (call == MAT_INITIAL_MATRIX) { 3529 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3530 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534 3535 /*@C 3536 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3537 and "off-diagonal" part of the matrix in CSR format. 3538 3539 Collective 3540 3541 Input Parameters: 3542 + comm - MPI communicator 3543 . A - "diagonal" portion of matrix 3544 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3545 - garray - global index of B columns 3546 3547 Output Parameter: 3548 . mat - the matrix, with input A as its local diagonal matrix 3549 Level: advanced 3550 3551 Notes: 3552 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3553 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3554 3555 .seealso: MatCreateMPIAIJWithSplitArrays() 3556 @*/ 3557 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3558 { 3559 PetscErrorCode ierr; 3560 Mat_MPIAIJ *maij; 3561 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3562 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3563 PetscScalar *oa=b->a; 3564 Mat Bnew; 3565 PetscInt m,n,N; 3566 3567 PetscFunctionBegin; 3568 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3569 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3570 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3571 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3572 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3573 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3574 3575 /* Get global columns of mat */ 3576 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3577 3578 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3579 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3580 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3581 maij = (Mat_MPIAIJ*)(*mat)->data; 3582 3583 (*mat)->preallocated = PETSC_TRUE; 3584 3585 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3586 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3587 3588 /* Set A as diagonal portion of *mat */ 3589 maij->A = A; 3590 3591 nz = oi[m]; 3592 for (i=0; i<nz; i++) { 3593 col = oj[i]; 3594 oj[i] = garray[col]; 3595 } 3596 3597 /* Set Bnew as off-diagonal portion of *mat */ 3598 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3599 bnew = (Mat_SeqAIJ*)Bnew->data; 3600 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3601 maij->B = Bnew; 3602 3603 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3604 3605 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3606 b->free_a = PETSC_FALSE; 3607 b->free_ij = PETSC_FALSE; 3608 ierr = MatDestroy(&B);CHKERRQ(ierr); 3609 3610 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3611 bnew->free_a = PETSC_TRUE; 3612 bnew->free_ij = PETSC_TRUE; 3613 3614 /* condense columns of maij->B */ 3615 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3616 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3619 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3620 PetscFunctionReturn(0); 3621 } 3622 3623 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3624 3625 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3626 { 3627 PetscErrorCode ierr; 3628 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3629 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3630 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3631 Mat M,Msub,B=a->B; 3632 MatScalar *aa; 3633 Mat_SeqAIJ *aij; 3634 PetscInt *garray = a->garray,*colsub,Ncols; 3635 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3636 IS iscol_sub,iscmap; 3637 const PetscInt *is_idx,*cmap; 3638 PetscBool allcolumns=PETSC_FALSE; 3639 MPI_Comm comm; 3640 3641 PetscFunctionBegin; 3642 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3643 3644 if (call == MAT_REUSE_MATRIX) { 3645 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3646 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3647 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3648 3649 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3650 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3651 3652 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3653 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3654 3655 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3656 3657 } else { /* call == MAT_INITIAL_MATRIX) */ 3658 PetscBool flg; 3659 3660 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3661 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3662 3663 /* (1) iscol -> nonscalable iscol_local */ 3664 /* Check for special case: each processor gets entire matrix columns */ 3665 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3666 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3667 if (allcolumns) { 3668 iscol_sub = iscol_local; 3669 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3670 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3671 3672 } else { 3673 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3674 PetscInt *idx,*cmap1,k; 3675 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3676 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3677 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3678 count = 0; 3679 k = 0; 3680 for (i=0; i<Ncols; i++) { 3681 j = is_idx[i]; 3682 if (j >= cstart && j < cend) { 3683 /* diagonal part of mat */ 3684 idx[count] = j; 3685 cmap1[count++] = i; /* column index in submat */ 3686 } else if (Bn) { 3687 /* off-diagonal part of mat */ 3688 if (j == garray[k]) { 3689 idx[count] = j; 3690 cmap1[count++] = i; /* column index in submat */ 3691 } else if (j > garray[k]) { 3692 while (j > garray[k] && k < Bn-1) k++; 3693 if (j == garray[k]) { 3694 idx[count] = j; 3695 cmap1[count++] = i; /* column index in submat */ 3696 } 3697 } 3698 } 3699 } 3700 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3701 3702 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3703 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3704 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3705 3706 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3707 } 3708 3709 /* (3) Create sequential Msub */ 3710 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3711 } 3712 3713 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3714 aij = (Mat_SeqAIJ*)(Msub)->data; 3715 ii = aij->i; 3716 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3717 3718 /* 3719 m - number of local rows 3720 Ncols - number of columns (same on all processors) 3721 rstart - first row in new global matrix generated 3722 */ 3723 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3724 3725 if (call == MAT_INITIAL_MATRIX) { 3726 /* (4) Create parallel newmat */ 3727 PetscMPIInt rank,size; 3728 PetscInt csize; 3729 3730 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3731 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3732 3733 /* 3734 Determine the number of non-zeros in the diagonal and off-diagonal 3735 portions of the matrix in order to do correct preallocation 3736 */ 3737 3738 /* first get start and end of "diagonal" columns */ 3739 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3740 if (csize == PETSC_DECIDE) { 3741 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3742 if (mglobal == Ncols) { /* square matrix */ 3743 nlocal = m; 3744 } else { 3745 nlocal = Ncols/size + ((Ncols % size) > rank); 3746 } 3747 } else { 3748 nlocal = csize; 3749 } 3750 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3751 rstart = rend - nlocal; 3752 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3753 3754 /* next, compute all the lengths */ 3755 jj = aij->j; 3756 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3757 olens = dlens + m; 3758 for (i=0; i<m; i++) { 3759 jend = ii[i+1] - ii[i]; 3760 olen = 0; 3761 dlen = 0; 3762 for (j=0; j<jend; j++) { 3763 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3764 else dlen++; 3765 jj++; 3766 } 3767 olens[i] = olen; 3768 dlens[i] = dlen; 3769 } 3770 3771 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3772 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3773 3774 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3775 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3776 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3777 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3778 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3779 ierr = PetscFree(dlens);CHKERRQ(ierr); 3780 3781 } else { /* call == MAT_REUSE_MATRIX */ 3782 M = *newmat; 3783 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3784 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3785 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3786 /* 3787 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3788 rather than the slower MatSetValues(). 3789 */ 3790 M->was_assembled = PETSC_TRUE; 3791 M->assembled = PETSC_FALSE; 3792 } 3793 3794 /* (5) Set values of Msub to *newmat */ 3795 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3796 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3797 3798 jj = aij->j; 3799 aa = aij->a; 3800 for (i=0; i<m; i++) { 3801 row = rstart + i; 3802 nz = ii[i+1] - ii[i]; 3803 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3804 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3805 jj += nz; aa += nz; 3806 } 3807 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3808 3809 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 3812 ierr = PetscFree(colsub);CHKERRQ(ierr); 3813 3814 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3815 if (call == MAT_INITIAL_MATRIX) { 3816 *newmat = M; 3817 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3818 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3819 3820 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3821 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3822 3823 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3824 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3825 3826 if (iscol_local) { 3827 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3828 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3829 } 3830 } 3831 PetscFunctionReturn(0); 3832 } 3833 3834 /* 3835 Not great since it makes two copies of the submatrix, first an SeqAIJ 3836 in local and then by concatenating the local matrices the end result. 3837 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3838 3839 Note: This requires a sequential iscol with all indices. 3840 */ 3841 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3842 { 3843 PetscErrorCode ierr; 3844 PetscMPIInt rank,size; 3845 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3846 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3847 Mat M,Mreuse; 3848 MatScalar *aa,*vwork; 3849 MPI_Comm comm; 3850 Mat_SeqAIJ *aij; 3851 PetscBool colflag,allcolumns=PETSC_FALSE; 3852 3853 PetscFunctionBegin; 3854 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3855 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3857 3858 /* Check for special case: each processor gets entire matrix columns */ 3859 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3860 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3861 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3862 3863 if (call == MAT_REUSE_MATRIX) { 3864 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3865 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3866 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3867 } else { 3868 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3869 } 3870 3871 /* 3872 m - number of local rows 3873 n - number of columns (same on all processors) 3874 rstart - first row in new global matrix generated 3875 */ 3876 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3877 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3878 if (call == MAT_INITIAL_MATRIX) { 3879 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3880 ii = aij->i; 3881 jj = aij->j; 3882 3883 /* 3884 Determine the number of non-zeros in the diagonal and off-diagonal 3885 portions of the matrix in order to do correct preallocation 3886 */ 3887 3888 /* first get start and end of "diagonal" columns */ 3889 if (csize == PETSC_DECIDE) { 3890 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3891 if (mglobal == n) { /* square matrix */ 3892 nlocal = m; 3893 } else { 3894 nlocal = n/size + ((n % size) > rank); 3895 } 3896 } else { 3897 nlocal = csize; 3898 } 3899 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3900 rstart = rend - nlocal; 3901 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3902 3903 /* next, compute all the lengths */ 3904 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3905 olens = dlens + m; 3906 for (i=0; i<m; i++) { 3907 jend = ii[i+1] - ii[i]; 3908 olen = 0; 3909 dlen = 0; 3910 for (j=0; j<jend; j++) { 3911 if (*jj < rstart || *jj >= rend) olen++; 3912 else dlen++; 3913 jj++; 3914 } 3915 olens[i] = olen; 3916 dlens[i] = dlen; 3917 } 3918 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3919 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3920 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3921 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3922 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3923 ierr = PetscFree(dlens);CHKERRQ(ierr); 3924 } else { 3925 PetscInt ml,nl; 3926 3927 M = *newmat; 3928 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3929 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3930 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3931 /* 3932 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3933 rather than the slower MatSetValues(). 3934 */ 3935 M->was_assembled = PETSC_TRUE; 3936 M->assembled = PETSC_FALSE; 3937 } 3938 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3939 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3940 ii = aij->i; 3941 jj = aij->j; 3942 aa = aij->a; 3943 for (i=0; i<m; i++) { 3944 row = rstart + i; 3945 nz = ii[i+1] - ii[i]; 3946 cwork = jj; jj += nz; 3947 vwork = aa; aa += nz; 3948 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3949 } 3950 3951 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3952 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 *newmat = M; 3954 3955 /* save submatrix used in processor for next request */ 3956 if (call == MAT_INITIAL_MATRIX) { 3957 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3958 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3959 } 3960 PetscFunctionReturn(0); 3961 } 3962 3963 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3964 { 3965 PetscInt m,cstart, cend,j,nnz,i,d; 3966 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3967 const PetscInt *JJ; 3968 PetscErrorCode ierr; 3969 PetscBool nooffprocentries; 3970 3971 PetscFunctionBegin; 3972 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3973 3974 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3975 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3976 m = B->rmap->n; 3977 cstart = B->cmap->rstart; 3978 cend = B->cmap->rend; 3979 rstart = B->rmap->rstart; 3980 3981 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3982 3983 #if defined(PETSC_USE_DEBUG) 3984 for (i=0; i<m; i++) { 3985 nnz = Ii[i+1]- Ii[i]; 3986 JJ = J + Ii[i]; 3987 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3988 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3989 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3990 } 3991 #endif 3992 3993 for (i=0; i<m; i++) { 3994 nnz = Ii[i+1]- Ii[i]; 3995 JJ = J + Ii[i]; 3996 nnz_max = PetscMax(nnz_max,nnz); 3997 d = 0; 3998 for (j=0; j<nnz; j++) { 3999 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4000 } 4001 d_nnz[i] = d; 4002 o_nnz[i] = nnz - d; 4003 } 4004 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4005 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4006 4007 for (i=0; i<m; i++) { 4008 ii = i + rstart; 4009 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4010 } 4011 nooffprocentries = B->nooffprocentries; 4012 B->nooffprocentries = PETSC_TRUE; 4013 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4014 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4015 B->nooffprocentries = nooffprocentries; 4016 4017 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4018 PetscFunctionReturn(0); 4019 } 4020 4021 /*@ 4022 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4023 (the default parallel PETSc format). 4024 4025 Collective 4026 4027 Input Parameters: 4028 + B - the matrix 4029 . i - the indices into j for the start of each local row (starts with zero) 4030 . j - the column indices for each local row (starts with zero) 4031 - v - optional values in the matrix 4032 4033 Level: developer 4034 4035 Notes: 4036 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4037 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4038 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4039 4040 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4041 4042 The format which is used for the sparse matrix input, is equivalent to a 4043 row-major ordering.. i.e for the following matrix, the input data expected is 4044 as shown 4045 4046 $ 1 0 0 4047 $ 2 0 3 P0 4048 $ ------- 4049 $ 4 5 6 P1 4050 $ 4051 $ Process0 [P0]: rows_owned=[0,1] 4052 $ i = {0,1,3} [size = nrow+1 = 2+1] 4053 $ j = {0,0,2} [size = 3] 4054 $ v = {1,2,3} [size = 3] 4055 $ 4056 $ Process1 [P1]: rows_owned=[2] 4057 $ i = {0,3} [size = nrow+1 = 1+1] 4058 $ j = {0,1,2} [size = 3] 4059 $ v = {4,5,6} [size = 3] 4060 4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4062 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4063 @*/ 4064 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4065 { 4066 PetscErrorCode ierr; 4067 4068 PetscFunctionBegin; 4069 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4070 PetscFunctionReturn(0); 4071 } 4072 4073 /*@C 4074 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4075 (the default parallel PETSc format). For good matrix assembly performance 4076 the user should preallocate the matrix storage by setting the parameters 4077 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4078 performance can be increased by more than a factor of 50. 4079 4080 Collective 4081 4082 Input Parameters: 4083 + B - the matrix 4084 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4085 (same value is used for all local rows) 4086 . d_nnz - array containing the number of nonzeros in the various rows of the 4087 DIAGONAL portion of the local submatrix (possibly different for each row) 4088 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4089 The size of this array is equal to the number of local rows, i.e 'm'. 4090 For matrices that will be factored, you must leave room for (and set) 4091 the diagonal entry even if it is zero. 4092 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4093 submatrix (same value is used for all local rows). 4094 - o_nnz - array containing the number of nonzeros in the various rows of the 4095 OFF-DIAGONAL portion of the local submatrix (possibly different for 4096 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4097 structure. The size of this array is equal to the number 4098 of local rows, i.e 'm'. 4099 4100 If the *_nnz parameter is given then the *_nz parameter is ignored 4101 4102 The AIJ format (also called the Yale sparse matrix format or 4103 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4104 storage. The stored row and column indices begin with zero. 4105 See Users-Manual: ch_mat for details. 4106 4107 The parallel matrix is partitioned such that the first m0 rows belong to 4108 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4109 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4110 4111 The DIAGONAL portion of the local submatrix of a processor can be defined 4112 as the submatrix which is obtained by extraction the part corresponding to 4113 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4114 first row that belongs to the processor, r2 is the last row belonging to 4115 the this processor, and c1-c2 is range of indices of the local part of a 4116 vector suitable for applying the matrix to. This is an mxn matrix. In the 4117 common case of a square matrix, the row and column ranges are the same and 4118 the DIAGONAL part is also square. The remaining portion of the local 4119 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4120 4121 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4122 4123 You can call MatGetInfo() to get information on how effective the preallocation was; 4124 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4125 You can also run with the option -info and look for messages with the string 4126 malloc in them to see if additional memory allocation was needed. 4127 4128 Example usage: 4129 4130 Consider the following 8x8 matrix with 34 non-zero values, that is 4131 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4132 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4133 as follows: 4134 4135 .vb 4136 1 2 0 | 0 3 0 | 0 4 4137 Proc0 0 5 6 | 7 0 0 | 8 0 4138 9 0 10 | 11 0 0 | 12 0 4139 ------------------------------------- 4140 13 0 14 | 15 16 17 | 0 0 4141 Proc1 0 18 0 | 19 20 21 | 0 0 4142 0 0 0 | 22 23 0 | 24 0 4143 ------------------------------------- 4144 Proc2 25 26 27 | 0 0 28 | 29 0 4145 30 0 0 | 31 32 33 | 0 34 4146 .ve 4147 4148 This can be represented as a collection of submatrices as: 4149 4150 .vb 4151 A B C 4152 D E F 4153 G H I 4154 .ve 4155 4156 Where the submatrices A,B,C are owned by proc0, D,E,F are 4157 owned by proc1, G,H,I are owned by proc2. 4158 4159 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4160 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4161 The 'M','N' parameters are 8,8, and have the same values on all procs. 4162 4163 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4164 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4165 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4166 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4167 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4168 matrix, ans [DF] as another SeqAIJ matrix. 4169 4170 When d_nz, o_nz parameters are specified, d_nz storage elements are 4171 allocated for every row of the local diagonal submatrix, and o_nz 4172 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4173 One way to choose d_nz and o_nz is to use the max nonzerors per local 4174 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4175 In this case, the values of d_nz,o_nz are: 4176 .vb 4177 proc0 : dnz = 2, o_nz = 2 4178 proc1 : dnz = 3, o_nz = 2 4179 proc2 : dnz = 1, o_nz = 4 4180 .ve 4181 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4182 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4183 for proc3. i.e we are using 12+15+10=37 storage locations to store 4184 34 values. 4185 4186 When d_nnz, o_nnz parameters are specified, the storage is specified 4187 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4188 In the above case the values for d_nnz,o_nnz are: 4189 .vb 4190 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4191 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4192 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4193 .ve 4194 Here the space allocated is sum of all the above values i.e 34, and 4195 hence pre-allocation is perfect. 4196 4197 Level: intermediate 4198 4199 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4200 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4201 @*/ 4202 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4203 { 4204 PetscErrorCode ierr; 4205 4206 PetscFunctionBegin; 4207 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4208 PetscValidType(B,1); 4209 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4210 PetscFunctionReturn(0); 4211 } 4212 4213 /*@ 4214 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4215 CSR format for the local rows. 4216 4217 Collective 4218 4219 Input Parameters: 4220 + comm - MPI communicator 4221 . m - number of local rows (Cannot be PETSC_DECIDE) 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4226 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4227 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4228 . j - column indices 4229 - a - matrix values 4230 4231 Output Parameter: 4232 . mat - the matrix 4233 4234 Level: intermediate 4235 4236 Notes: 4237 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4238 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4239 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4240 4241 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4242 4243 The format which is used for the sparse matrix input, is equivalent to a 4244 row-major ordering.. i.e for the following matrix, the input data expected is 4245 as shown 4246 4247 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4248 4249 $ 1 0 0 4250 $ 2 0 3 P0 4251 $ ------- 4252 $ 4 5 6 P1 4253 $ 4254 $ Process0 [P0]: rows_owned=[0,1] 4255 $ i = {0,1,3} [size = nrow+1 = 2+1] 4256 $ j = {0,0,2} [size = 3] 4257 $ v = {1,2,3} [size = 3] 4258 $ 4259 $ Process1 [P1]: rows_owned=[2] 4260 $ i = {0,3} [size = nrow+1 = 1+1] 4261 $ j = {0,1,2} [size = 3] 4262 $ v = {4,5,6} [size = 3] 4263 4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4265 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4266 @*/ 4267 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4268 { 4269 PetscErrorCode ierr; 4270 4271 PetscFunctionBegin; 4272 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4273 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4274 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4275 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4276 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4277 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4278 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4279 PetscFunctionReturn(0); 4280 } 4281 4282 /*@ 4283 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4284 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 . m - number of local rows (Cannot be PETSC_DECIDE) 4291 . n - This value should be the same as the local size used in creating the 4292 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4293 calculated if N is given) For square matrices n is almost always m. 4294 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4295 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4296 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4297 . J - column indices 4298 - v - matrix values 4299 4300 Level: intermediate 4301 4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4303 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4304 @*/ 4305 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4306 { 4307 PetscErrorCode ierr; 4308 PetscInt cstart,nnz,i,j; 4309 PetscInt *ld; 4310 PetscBool nooffprocentries; 4311 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4312 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4313 PetscScalar *ad = Ad->a, *ao = Ao->a; 4314 const PetscInt *Adi = Ad->i; 4315 PetscInt ldi,Iii,md; 4316 4317 PetscFunctionBegin; 4318 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4319 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4320 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4321 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4322 4323 cstart = mat->cmap->rstart; 4324 if (!Aij->ld) { 4325 /* count number of entries below block diagonal */ 4326 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4327 Aij->ld = ld; 4328 for (i=0; i<m; i++) { 4329 nnz = Ii[i+1]- Ii[i]; 4330 j = 0; 4331 while (J[j] < cstart && j < nnz) {j++;} 4332 J += nnz; 4333 ld[i] = j; 4334 } 4335 } else { 4336 ld = Aij->ld; 4337 } 4338 4339 for (i=0; i<m; i++) { 4340 nnz = Ii[i+1]- Ii[i]; 4341 Iii = Ii[i]; 4342 ldi = ld[i]; 4343 md = Adi[i+1]-Adi[i]; 4344 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4345 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4346 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4347 ad += md; 4348 ao += nnz - md; 4349 } 4350 nooffprocentries = mat->nooffprocentries; 4351 mat->nooffprocentries = PETSC_TRUE; 4352 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4353 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4354 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4355 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 mat->nooffprocentries = nooffprocentries; 4358 PetscFunctionReturn(0); 4359 } 4360 4361 /*@C 4362 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4363 (the default parallel PETSc format). For good matrix assembly performance 4364 the user should preallocate the matrix storage by setting the parameters 4365 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4366 performance can be increased by more than a factor of 50. 4367 4368 Collective 4369 4370 Input Parameters: 4371 + comm - MPI communicator 4372 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4373 This value should be the same as the local size used in creating the 4374 y vector for the matrix-vector product y = Ax. 4375 . n - This value should be the same as the local size used in creating the 4376 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4377 calculated if N is given) For square matrices n is almost always m. 4378 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4379 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4380 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4381 (same value is used for all local rows) 4382 . d_nnz - array containing the number of nonzeros in the various rows of the 4383 DIAGONAL portion of the local submatrix (possibly different for each row) 4384 or NULL, if d_nz is used to specify the nonzero structure. 4385 The size of this array is equal to the number of local rows, i.e 'm'. 4386 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4387 submatrix (same value is used for all local rows). 4388 - o_nnz - array containing the number of nonzeros in the various rows of the 4389 OFF-DIAGONAL portion of the local submatrix (possibly different for 4390 each row) or NULL, if o_nz is used to specify the nonzero 4391 structure. The size of this array is equal to the number 4392 of local rows, i.e 'm'. 4393 4394 Output Parameter: 4395 . A - the matrix 4396 4397 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4398 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4399 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4400 4401 Notes: 4402 If the *_nnz parameter is given then the *_nz parameter is ignored 4403 4404 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4405 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4406 storage requirements for this matrix. 4407 4408 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4409 processor than it must be used on all processors that share the object for 4410 that argument. 4411 4412 The user MUST specify either the local or global matrix dimensions 4413 (possibly both). 4414 4415 The parallel matrix is partitioned across processors such that the 4416 first m0 rows belong to process 0, the next m1 rows belong to 4417 process 1, the next m2 rows belong to process 2 etc.. where 4418 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4419 values corresponding to [m x N] submatrix. 4420 4421 The columns are logically partitioned with the n0 columns belonging 4422 to 0th partition, the next n1 columns belonging to the next 4423 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4424 4425 The DIAGONAL portion of the local submatrix on any given processor 4426 is the submatrix corresponding to the rows and columns m,n 4427 corresponding to the given processor. i.e diagonal matrix on 4428 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4429 etc. The remaining portion of the local submatrix [m x (N-n)] 4430 constitute the OFF-DIAGONAL portion. The example below better 4431 illustrates this concept. 4432 4433 For a square global matrix we define each processor's diagonal portion 4434 to be its local rows and the corresponding columns (a square submatrix); 4435 each processor's off-diagonal portion encompasses the remainder of the 4436 local matrix (a rectangular submatrix). 4437 4438 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4439 4440 When calling this routine with a single process communicator, a matrix of 4441 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4442 type of communicator, use the construction mechanism 4443 .vb 4444 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4445 .ve 4446 4447 $ MatCreate(...,&A); 4448 $ MatSetType(A,MATMPIAIJ); 4449 $ MatSetSizes(A, m,n,M,N); 4450 $ MatMPIAIJSetPreallocation(A,...); 4451 4452 By default, this format uses inodes (identical nodes) when possible. 4453 We search for consecutive rows with the same nonzero structure, thereby 4454 reusing matrix information to achieve increased efficiency. 4455 4456 Options Database Keys: 4457 + -mat_no_inode - Do not use inodes 4458 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4459 4460 4461 4462 Example usage: 4463 4464 Consider the following 8x8 matrix with 34 non-zero values, that is 4465 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4466 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4467 as follows 4468 4469 .vb 4470 1 2 0 | 0 3 0 | 0 4 4471 Proc0 0 5 6 | 7 0 0 | 8 0 4472 9 0 10 | 11 0 0 | 12 0 4473 ------------------------------------- 4474 13 0 14 | 15 16 17 | 0 0 4475 Proc1 0 18 0 | 19 20 21 | 0 0 4476 0 0 0 | 22 23 0 | 24 0 4477 ------------------------------------- 4478 Proc2 25 26 27 | 0 0 28 | 29 0 4479 30 0 0 | 31 32 33 | 0 34 4480 .ve 4481 4482 This can be represented as a collection of submatrices as 4483 4484 .vb 4485 A B C 4486 D E F 4487 G H I 4488 .ve 4489 4490 Where the submatrices A,B,C are owned by proc0, D,E,F are 4491 owned by proc1, G,H,I are owned by proc2. 4492 4493 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'M','N' parameters are 8,8, and have the same values on all procs. 4496 4497 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4498 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4499 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4500 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4501 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4502 matrix, ans [DF] as another SeqAIJ matrix. 4503 4504 When d_nz, o_nz parameters are specified, d_nz storage elements are 4505 allocated for every row of the local diagonal submatrix, and o_nz 4506 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4507 One way to choose d_nz and o_nz is to use the max nonzerors per local 4508 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4509 In this case, the values of d_nz,o_nz are 4510 .vb 4511 proc0 : dnz = 2, o_nz = 2 4512 proc1 : dnz = 3, o_nz = 2 4513 proc2 : dnz = 1, o_nz = 4 4514 .ve 4515 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4516 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4517 for proc3. i.e we are using 12+15+10=37 storage locations to store 4518 34 values. 4519 4520 When d_nnz, o_nnz parameters are specified, the storage is specified 4521 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4522 In the above case the values for d_nnz,o_nnz are 4523 .vb 4524 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4525 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4526 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4527 .ve 4528 Here the space allocated is sum of all the above values i.e 34, and 4529 hence pre-allocation is perfect. 4530 4531 Level: intermediate 4532 4533 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4534 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4535 @*/ 4536 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4537 { 4538 PetscErrorCode ierr; 4539 PetscMPIInt size; 4540 4541 PetscFunctionBegin; 4542 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4543 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4545 if (size > 1) { 4546 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4547 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4548 } else { 4549 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4550 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4551 } 4552 PetscFunctionReturn(0); 4553 } 4554 4555 /*@C 4556 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4557 4558 Not collective 4559 4560 Input Parameter: 4561 . A - The MPIAIJ matrix 4562 4563 Output Parameters: 4564 + Ad - The local diagonal block as a SeqAIJ matrix 4565 . Ao - The local off-diagonal block as a SeqAIJ matrix 4566 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4567 4568 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4569 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4570 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4571 local column numbers to global column numbers in the original matrix. 4572 4573 Level: intermediate 4574 4575 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4576 @*/ 4577 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4578 { 4579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4580 PetscBool flg; 4581 PetscErrorCode ierr; 4582 4583 PetscFunctionBegin; 4584 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4585 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4586 if (Ad) *Ad = a->A; 4587 if (Ao) *Ao = a->B; 4588 if (colmap) *colmap = a->garray; 4589 PetscFunctionReturn(0); 4590 } 4591 4592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4593 { 4594 PetscErrorCode ierr; 4595 PetscInt m,N,i,rstart,nnz,Ii; 4596 PetscInt *indx; 4597 PetscScalar *values; 4598 4599 PetscFunctionBegin; 4600 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4601 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4602 PetscInt *dnz,*onz,sum,bs,cbs; 4603 4604 if (n == PETSC_DECIDE) { 4605 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4606 } 4607 /* Check sum(n) = N */ 4608 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4609 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4610 4611 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4612 rstart -= m; 4613 4614 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4615 for (i=0; i<m; i++) { 4616 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4617 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4618 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4619 } 4620 4621 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4622 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4623 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4624 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4625 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4626 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4627 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4628 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4629 } 4630 4631 /* numeric phase */ 4632 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4633 for (i=0; i<m; i++) { 4634 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4635 Ii = i + rstart; 4636 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4637 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4638 } 4639 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4640 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4645 { 4646 PetscErrorCode ierr; 4647 PetscMPIInt rank; 4648 PetscInt m,N,i,rstart,nnz; 4649 size_t len; 4650 const PetscInt *indx; 4651 PetscViewer out; 4652 char *name; 4653 Mat B; 4654 const PetscScalar *values; 4655 4656 PetscFunctionBegin; 4657 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4658 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4659 /* Should this be the type of the diagonal block of A? */ 4660 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4661 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4662 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4663 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4664 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4665 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4666 for (i=0; i<m; i++) { 4667 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4668 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4669 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4670 } 4671 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4672 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4673 4674 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4675 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4676 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4677 sprintf(name,"%s.%d",outfile,rank); 4678 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4679 ierr = PetscFree(name);CHKERRQ(ierr); 4680 ierr = MatView(B,out);CHKERRQ(ierr); 4681 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4682 ierr = MatDestroy(&B);CHKERRQ(ierr); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4687 { 4688 PetscErrorCode ierr; 4689 Mat_Merge_SeqsToMPI *merge; 4690 PetscContainer container; 4691 4692 PetscFunctionBegin; 4693 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4694 if (container) { 4695 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4696 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4697 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4698 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4699 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4700 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4701 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4702 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4703 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4704 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4705 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4706 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4707 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4708 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4709 ierr = PetscFree(merge);CHKERRQ(ierr); 4710 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4711 } 4712 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4713 PetscFunctionReturn(0); 4714 } 4715 4716 #include <../src/mat/utils/freespace.h> 4717 #include <petscbt.h> 4718 4719 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4720 { 4721 PetscErrorCode ierr; 4722 MPI_Comm comm; 4723 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4724 PetscMPIInt size,rank,taga,*len_s; 4725 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4726 PetscInt proc,m; 4727 PetscInt **buf_ri,**buf_rj; 4728 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4729 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4730 MPI_Request *s_waits,*r_waits; 4731 MPI_Status *status; 4732 MatScalar *aa=a->a; 4733 MatScalar **abuf_r,*ba_i; 4734 Mat_Merge_SeqsToMPI *merge; 4735 PetscContainer container; 4736 4737 PetscFunctionBegin; 4738 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4739 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4740 4741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4742 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4743 4744 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4745 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4746 4747 bi = merge->bi; 4748 bj = merge->bj; 4749 buf_ri = merge->buf_ri; 4750 buf_rj = merge->buf_rj; 4751 4752 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4753 owners = merge->rowmap->range; 4754 len_s = merge->len_s; 4755 4756 /* send and recv matrix values */ 4757 /*-----------------------------*/ 4758 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4759 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4760 4761 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4762 for (proc=0,k=0; proc<size; proc++) { 4763 if (!len_s[proc]) continue; 4764 i = owners[proc]; 4765 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4766 k++; 4767 } 4768 4769 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4770 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4771 ierr = PetscFree(status);CHKERRQ(ierr); 4772 4773 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4775 4776 /* insert mat values of mpimat */ 4777 /*----------------------------*/ 4778 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4779 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4780 4781 for (k=0; k<merge->nrecv; k++) { 4782 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4783 nrows = *(buf_ri_k[k]); 4784 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4785 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4786 } 4787 4788 /* set values of ba */ 4789 m = merge->rowmap->n; 4790 for (i=0; i<m; i++) { 4791 arow = owners[rank] + i; 4792 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4793 bnzi = bi[i+1] - bi[i]; 4794 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4795 4796 /* add local non-zero vals of this proc's seqmat into ba */ 4797 anzi = ai[arow+1] - ai[arow]; 4798 aj = a->j + ai[arow]; 4799 aa = a->a + ai[arow]; 4800 nextaj = 0; 4801 for (j=0; nextaj<anzi; j++) { 4802 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4803 ba_i[j] += aa[nextaj++]; 4804 } 4805 } 4806 4807 /* add received vals into ba */ 4808 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4809 /* i-th row */ 4810 if (i == *nextrow[k]) { 4811 anzi = *(nextai[k]+1) - *nextai[k]; 4812 aj = buf_rj[k] + *(nextai[k]); 4813 aa = abuf_r[k] + *(nextai[k]); 4814 nextaj = 0; 4815 for (j=0; nextaj<anzi; j++) { 4816 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4817 ba_i[j] += aa[nextaj++]; 4818 } 4819 } 4820 nextrow[k]++; nextai[k]++; 4821 } 4822 } 4823 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4824 } 4825 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4826 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4827 4828 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4829 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4830 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4831 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4832 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4833 PetscFunctionReturn(0); 4834 } 4835 4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4837 { 4838 PetscErrorCode ierr; 4839 Mat B_mpi; 4840 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4841 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4842 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4843 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4844 PetscInt len,proc,*dnz,*onz,bs,cbs; 4845 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4846 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4847 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4848 MPI_Status *status; 4849 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4850 PetscBT lnkbt; 4851 Mat_Merge_SeqsToMPI *merge; 4852 PetscContainer container; 4853 4854 PetscFunctionBegin; 4855 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4856 4857 /* make sure it is a PETSc comm */ 4858 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4859 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4860 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4861 4862 ierr = PetscNew(&merge);CHKERRQ(ierr); 4863 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4864 4865 /* determine row ownership */ 4866 /*---------------------------------------------------------*/ 4867 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4868 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4869 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4870 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4871 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4872 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4873 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4874 4875 m = merge->rowmap->n; 4876 owners = merge->rowmap->range; 4877 4878 /* determine the number of messages to send, their lengths */ 4879 /*---------------------------------------------------------*/ 4880 len_s = merge->len_s; 4881 4882 len = 0; /* length of buf_si[] */ 4883 merge->nsend = 0; 4884 for (proc=0; proc<size; proc++) { 4885 len_si[proc] = 0; 4886 if (proc == rank) { 4887 len_s[proc] = 0; 4888 } else { 4889 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4890 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4891 } 4892 if (len_s[proc]) { 4893 merge->nsend++; 4894 nrows = 0; 4895 for (i=owners[proc]; i<owners[proc+1]; i++) { 4896 if (ai[i+1] > ai[i]) nrows++; 4897 } 4898 len_si[proc] = 2*(nrows+1); 4899 len += len_si[proc]; 4900 } 4901 } 4902 4903 /* determine the number and length of messages to receive for ij-structure */ 4904 /*-------------------------------------------------------------------------*/ 4905 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4906 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4907 4908 /* post the Irecv of j-structure */ 4909 /*-------------------------------*/ 4910 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4911 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4912 4913 /* post the Isend of j-structure */ 4914 /*--------------------------------*/ 4915 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4916 4917 for (proc=0, k=0; proc<size; proc++) { 4918 if (!len_s[proc]) continue; 4919 i = owners[proc]; 4920 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4921 k++; 4922 } 4923 4924 /* receives and sends of j-structure are complete */ 4925 /*------------------------------------------------*/ 4926 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4927 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4928 4929 /* send and recv i-structure */ 4930 /*---------------------------*/ 4931 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4932 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4933 4934 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4935 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4936 for (proc=0,k=0; proc<size; proc++) { 4937 if (!len_s[proc]) continue; 4938 /* form outgoing message for i-structure: 4939 buf_si[0]: nrows to be sent 4940 [1:nrows]: row index (global) 4941 [nrows+1:2*nrows+1]: i-structure index 4942 */ 4943 /*-------------------------------------------*/ 4944 nrows = len_si[proc]/2 - 1; 4945 buf_si_i = buf_si + nrows+1; 4946 buf_si[0] = nrows; 4947 buf_si_i[0] = 0; 4948 nrows = 0; 4949 for (i=owners[proc]; i<owners[proc+1]; i++) { 4950 anzi = ai[i+1] - ai[i]; 4951 if (anzi) { 4952 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4953 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4954 nrows++; 4955 } 4956 } 4957 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4958 k++; 4959 buf_si += len_si[proc]; 4960 } 4961 4962 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4963 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4964 4965 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4966 for (i=0; i<merge->nrecv; i++) { 4967 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4968 } 4969 4970 ierr = PetscFree(len_si);CHKERRQ(ierr); 4971 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4972 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4973 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4974 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4975 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4976 ierr = PetscFree(status);CHKERRQ(ierr); 4977 4978 /* compute a local seq matrix in each processor */ 4979 /*----------------------------------------------*/ 4980 /* allocate bi array and free space for accumulating nonzero column info */ 4981 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4982 bi[0] = 0; 4983 4984 /* create and initialize a linked list */ 4985 nlnk = N+1; 4986 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4987 4988 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4989 len = ai[owners[rank+1]] - ai[owners[rank]]; 4990 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4991 4992 current_space = free_space; 4993 4994 /* determine symbolic info for each local row */ 4995 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4996 4997 for (k=0; k<merge->nrecv; k++) { 4998 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4999 nrows = *buf_ri_k[k]; 5000 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5001 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5002 } 5003 5004 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5005 len = 0; 5006 for (i=0; i<m; i++) { 5007 bnzi = 0; 5008 /* add local non-zero cols of this proc's seqmat into lnk */ 5009 arow = owners[rank] + i; 5010 anzi = ai[arow+1] - ai[arow]; 5011 aj = a->j + ai[arow]; 5012 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5013 bnzi += nlnk; 5014 /* add received col data into lnk */ 5015 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5016 if (i == *nextrow[k]) { /* i-th row */ 5017 anzi = *(nextai[k]+1) - *nextai[k]; 5018 aj = buf_rj[k] + *nextai[k]; 5019 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5020 bnzi += nlnk; 5021 nextrow[k]++; nextai[k]++; 5022 } 5023 } 5024 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5025 5026 /* if free space is not available, make more free space */ 5027 if (current_space->local_remaining<bnzi) { 5028 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5029 nspacedouble++; 5030 } 5031 /* copy data into free space, then initialize lnk */ 5032 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5033 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5034 5035 current_space->array += bnzi; 5036 current_space->local_used += bnzi; 5037 current_space->local_remaining -= bnzi; 5038 5039 bi[i+1] = bi[i] + bnzi; 5040 } 5041 5042 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5043 5044 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5045 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5046 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5047 5048 /* create symbolic parallel matrix B_mpi */ 5049 /*---------------------------------------*/ 5050 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5051 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5052 if (n==PETSC_DECIDE) { 5053 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5054 } else { 5055 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5056 } 5057 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5058 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5059 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5060 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5061 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5062 5063 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5064 B_mpi->assembled = PETSC_FALSE; 5065 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5066 merge->bi = bi; 5067 merge->bj = bj; 5068 merge->buf_ri = buf_ri; 5069 merge->buf_rj = buf_rj; 5070 merge->coi = NULL; 5071 merge->coj = NULL; 5072 merge->owners_co = NULL; 5073 5074 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5075 5076 /* attach the supporting struct to B_mpi for reuse */ 5077 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5078 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5079 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5080 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5081 *mpimat = B_mpi; 5082 5083 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5084 PetscFunctionReturn(0); 5085 } 5086 5087 /*@C 5088 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5089 matrices from each processor 5090 5091 Collective 5092 5093 Input Parameters: 5094 + comm - the communicators the parallel matrix will live on 5095 . seqmat - the input sequential matrices 5096 . m - number of local rows (or PETSC_DECIDE) 5097 . n - number of local columns (or PETSC_DECIDE) 5098 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5099 5100 Output Parameter: 5101 . mpimat - the parallel matrix generated 5102 5103 Level: advanced 5104 5105 Notes: 5106 The dimensions of the sequential matrix in each processor MUST be the same. 5107 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5108 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5109 @*/ 5110 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5111 { 5112 PetscErrorCode ierr; 5113 PetscMPIInt size; 5114 5115 PetscFunctionBegin; 5116 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5117 if (size == 1) { 5118 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5119 if (scall == MAT_INITIAL_MATRIX) { 5120 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5121 } else { 5122 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5123 } 5124 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5128 if (scall == MAT_INITIAL_MATRIX) { 5129 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5130 } 5131 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5132 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5133 PetscFunctionReturn(0); 5134 } 5135 5136 /*@ 5137 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5138 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5139 with MatGetSize() 5140 5141 Not Collective 5142 5143 Input Parameters: 5144 + A - the matrix 5145 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5146 5147 Output Parameter: 5148 . A_loc - the local sequential matrix generated 5149 5150 Level: developer 5151 5152 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5153 5154 @*/ 5155 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5156 { 5157 PetscErrorCode ierr; 5158 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5159 Mat_SeqAIJ *mat,*a,*b; 5160 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5161 MatScalar *aa,*ba,*cam; 5162 PetscScalar *ca; 5163 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5164 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5165 PetscBool match; 5166 MPI_Comm comm; 5167 PetscMPIInt size; 5168 5169 PetscFunctionBegin; 5170 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5171 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5172 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5174 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5175 5176 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5177 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5178 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5179 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5180 aa = a->a; ba = b->a; 5181 if (scall == MAT_INITIAL_MATRIX) { 5182 if (size == 1) { 5183 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5188 ci[0] = 0; 5189 for (i=0; i<am; i++) { 5190 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5191 } 5192 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5193 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5194 k = 0; 5195 for (i=0; i<am; i++) { 5196 ncols_o = bi[i+1] - bi[i]; 5197 ncols_d = ai[i+1] - ai[i]; 5198 /* off-diagonal portion of A */ 5199 for (jo=0; jo<ncols_o; jo++) { 5200 col = cmap[*bj]; 5201 if (col >= cstart) break; 5202 cj[k] = col; bj++; 5203 ca[k++] = *ba++; 5204 } 5205 /* diagonal portion of A */ 5206 for (j=0; j<ncols_d; j++) { 5207 cj[k] = cstart + *aj++; 5208 ca[k++] = *aa++; 5209 } 5210 /* off-diagonal portion of A */ 5211 for (j=jo; j<ncols_o; j++) { 5212 cj[k] = cmap[*bj++]; 5213 ca[k++] = *ba++; 5214 } 5215 } 5216 /* put together the new matrix */ 5217 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5218 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5219 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5220 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5221 mat->free_a = PETSC_TRUE; 5222 mat->free_ij = PETSC_TRUE; 5223 mat->nonew = 0; 5224 } else if (scall == MAT_REUSE_MATRIX) { 5225 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5226 ci = mat->i; cj = mat->j; cam = mat->a; 5227 for (i=0; i<am; i++) { 5228 /* off-diagonal portion of A */ 5229 ncols_o = bi[i+1] - bi[i]; 5230 for (jo=0; jo<ncols_o; jo++) { 5231 col = cmap[*bj]; 5232 if (col >= cstart) break; 5233 *cam++ = *ba++; bj++; 5234 } 5235 /* diagonal portion of A */ 5236 ncols_d = ai[i+1] - ai[i]; 5237 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5238 /* off-diagonal portion of A */ 5239 for (j=jo; j<ncols_o; j++) { 5240 *cam++ = *ba++; bj++; 5241 } 5242 } 5243 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5244 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5245 PetscFunctionReturn(0); 5246 } 5247 5248 /*@C 5249 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5250 5251 Not Collective 5252 5253 Input Parameters: 5254 + A - the matrix 5255 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5256 - row, col - index sets of rows and columns to extract (or NULL) 5257 5258 Output Parameter: 5259 . A_loc - the local sequential matrix generated 5260 5261 Level: developer 5262 5263 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5264 5265 @*/ 5266 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5267 { 5268 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5269 PetscErrorCode ierr; 5270 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5271 IS isrowa,iscola; 5272 Mat *aloc; 5273 PetscBool match; 5274 5275 PetscFunctionBegin; 5276 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5277 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5278 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5279 if (!row) { 5280 start = A->rmap->rstart; end = A->rmap->rend; 5281 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5282 } else { 5283 isrowa = *row; 5284 } 5285 if (!col) { 5286 start = A->cmap->rstart; 5287 cmap = a->garray; 5288 nzA = a->A->cmap->n; 5289 nzB = a->B->cmap->n; 5290 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5291 ncols = 0; 5292 for (i=0; i<nzB; i++) { 5293 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5294 else break; 5295 } 5296 imark = i; 5297 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5298 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5299 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5300 } else { 5301 iscola = *col; 5302 } 5303 if (scall != MAT_INITIAL_MATRIX) { 5304 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5305 aloc[0] = *A_loc; 5306 } 5307 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5308 if (!col) { /* attach global id of condensed columns */ 5309 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5310 } 5311 *A_loc = aloc[0]; 5312 ierr = PetscFree(aloc);CHKERRQ(ierr); 5313 if (!row) { 5314 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5315 } 5316 if (!col) { 5317 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5318 } 5319 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5320 PetscFunctionReturn(0); 5321 } 5322 5323 /* 5324 * Destroy a mat that may be composed with PetscSF communication objects. 5325 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5326 * */ 5327 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5328 { 5329 PetscSF sf,osf; 5330 IS map; 5331 PetscErrorCode ierr; 5332 5333 PetscFunctionBegin; 5334 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5335 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5336 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5337 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5338 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5339 ierr = ISDestroy(&map);CHKERRQ(ierr); 5340 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5341 PetscFunctionReturn(0); 5342 } 5343 5344 /* 5345 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5346 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5347 * on a global size. 5348 * */ 5349 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5350 { 5351 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5352 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5353 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5354 PetscSFNode *iremote,*oiremote; 5355 const PetscInt *lrowindices; 5356 PetscErrorCode ierr; 5357 PetscSF sf,osf; 5358 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5359 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5360 MPI_Comm comm; 5361 ISLocalToGlobalMapping mapping; 5362 5363 PetscFunctionBegin; 5364 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5365 /* plocalsize is the number of roots 5366 * nrows is the number of leaves 5367 * */ 5368 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5369 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5370 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5371 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5372 for (i=0;i<nrows;i++) { 5373 /* Find a remote index and an owner for a row 5374 * The row could be local or remote 5375 * */ 5376 owner = 0; 5377 lidx = 0; 5378 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5379 iremote[i].index = lidx; 5380 iremote[i].rank = owner; 5381 } 5382 /* Create SF to communicate how many nonzero columns for each row */ 5383 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5384 /* SF will figure out the number of nonzero colunms for each row, and their 5385 * offsets 5386 * */ 5387 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5388 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5389 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5390 5391 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5392 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5393 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5394 roffsets[0] = 0; 5395 roffsets[1] = 0; 5396 for (i=0;i<plocalsize;i++) { 5397 /* diag */ 5398 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5399 /* off diag */ 5400 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5401 /* compute offsets so that we relative location for each row */ 5402 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5403 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5404 } 5405 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5406 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5407 /* 'r' means root, and 'l' means leaf */ 5408 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5409 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5410 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5411 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5412 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5413 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5414 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5415 dntotalcols = 0; 5416 ontotalcols = 0; 5417 ncol = 0; 5418 for (i=0;i<nrows;i++) { 5419 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5420 ncol = PetscMax(pnnz[i],ncol); 5421 /* diag */ 5422 dntotalcols += nlcols[i*2+0]; 5423 /* off diag */ 5424 ontotalcols += nlcols[i*2+1]; 5425 } 5426 /* We do not need to figure the right number of columns 5427 * since all the calculations will be done by going through the raw data 5428 * */ 5429 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5430 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5431 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5432 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5433 /* diag */ 5434 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5435 /* off diag */ 5436 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5437 /* diag */ 5438 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5439 /* off diag */ 5440 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5441 dntotalcols = 0; 5442 ontotalcols = 0; 5443 ntotalcols = 0; 5444 for (i=0;i<nrows;i++) { 5445 owner = 0; 5446 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5447 /* Set iremote for diag matrix */ 5448 for (j=0;j<nlcols[i*2+0];j++) { 5449 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5450 iremote[dntotalcols].rank = owner; 5451 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5452 ilocal[dntotalcols++] = ntotalcols++; 5453 } 5454 /* off diag */ 5455 for (j=0;j<nlcols[i*2+1];j++) { 5456 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5457 oiremote[ontotalcols].rank = owner; 5458 oilocal[ontotalcols++] = ntotalcols++; 5459 } 5460 } 5461 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5462 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5463 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5464 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5465 /* P serves as roots and P_oth is leaves 5466 * Diag matrix 5467 * */ 5468 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5469 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5470 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5471 5472 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5473 /* Off diag */ 5474 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5475 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5476 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5477 /* We operate on the matrix internal data for saving memory */ 5478 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5479 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5480 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5481 /* Convert to global indices for diag matrix */ 5482 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5483 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5484 /* We want P_oth store global indices */ 5485 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5486 /* Use memory scalable approach */ 5487 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5488 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5489 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5490 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5491 /* Convert back to local indices */ 5492 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5493 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5494 nout = 0; 5495 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5496 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5497 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5498 /* Exchange values */ 5499 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5500 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5501 /* Stop PETSc from shrinking memory */ 5502 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5503 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5504 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5505 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5506 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5507 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5508 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5509 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5510 PetscFunctionReturn(0); 5511 } 5512 5513 /* 5514 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5515 * This supports MPIAIJ and MAIJ 5516 * */ 5517 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5518 { 5519 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5520 Mat_SeqAIJ *p_oth; 5521 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5522 IS rows,map; 5523 PetscHMapI hamp; 5524 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5525 MPI_Comm comm; 5526 PetscSF sf,osf; 5527 PetscBool has; 5528 PetscErrorCode ierr; 5529 5530 PetscFunctionBegin; 5531 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5532 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5533 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5534 * and then create a submatrix (that often is an overlapping matrix) 5535 * */ 5536 if (reuse==MAT_INITIAL_MATRIX) { 5537 /* Use a hash table to figure out unique keys */ 5538 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5539 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5540 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5541 count = 0; 5542 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5543 for (i=0;i<a->B->cmap->n;i++) { 5544 key = a->garray[i]/dof; 5545 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5546 if (!has) { 5547 mapping[i] = count; 5548 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5549 } else { 5550 /* Current 'i' has the same value the previous step */ 5551 mapping[i] = count-1; 5552 } 5553 } 5554 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5555 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5556 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5557 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5558 off = 0; 5559 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5560 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5561 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5562 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5563 /* In case, the matrix was already created but users want to recreate the matrix */ 5564 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5565 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5566 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5567 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5568 } else if (reuse==MAT_REUSE_MATRIX) { 5569 /* If matrix was already created, we simply update values using SF objects 5570 * that as attached to the matrix ealier. 5571 * */ 5572 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5573 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5574 if (!sf || !osf) { 5575 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5576 } 5577 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5578 /* Update values in place */ 5579 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5580 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5581 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5582 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5583 } else { 5584 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5585 } 5586 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5587 PetscFunctionReturn(0); 5588 } 5589 5590 /*@C 5591 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5592 5593 Collective on Mat 5594 5595 Input Parameters: 5596 + A,B - the matrices in mpiaij format 5597 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5598 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5599 5600 Output Parameter: 5601 + rowb, colb - index sets of rows and columns of B to extract 5602 - B_seq - the sequential matrix generated 5603 5604 Level: developer 5605 5606 @*/ 5607 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5608 { 5609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5610 PetscErrorCode ierr; 5611 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5612 IS isrowb,iscolb; 5613 Mat *bseq=NULL; 5614 5615 PetscFunctionBegin; 5616 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5617 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5618 } 5619 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5620 5621 if (scall == MAT_INITIAL_MATRIX) { 5622 start = A->cmap->rstart; 5623 cmap = a->garray; 5624 nzA = a->A->cmap->n; 5625 nzB = a->B->cmap->n; 5626 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5627 ncols = 0; 5628 for (i=0; i<nzB; i++) { /* row < local row index */ 5629 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5630 else break; 5631 } 5632 imark = i; 5633 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5634 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5635 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5636 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5637 } else { 5638 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5639 isrowb = *rowb; iscolb = *colb; 5640 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5641 bseq[0] = *B_seq; 5642 } 5643 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5644 *B_seq = bseq[0]; 5645 ierr = PetscFree(bseq);CHKERRQ(ierr); 5646 if (!rowb) { 5647 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5648 } else { 5649 *rowb = isrowb; 5650 } 5651 if (!colb) { 5652 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5653 } else { 5654 *colb = iscolb; 5655 } 5656 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5657 PetscFunctionReturn(0); 5658 } 5659 5660 /* 5661 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5662 of the OFF-DIAGONAL portion of local A 5663 5664 Collective on Mat 5665 5666 Input Parameters: 5667 + A,B - the matrices in mpiaij format 5668 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5669 5670 Output Parameter: 5671 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5672 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5673 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5674 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5675 5676 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5677 for this matrix. This is not desirable.. 5678 5679 Level: developer 5680 5681 */ 5682 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5683 { 5684 PetscErrorCode ierr; 5685 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5686 Mat_SeqAIJ *b_oth; 5687 VecScatter ctx; 5688 MPI_Comm comm; 5689 const PetscMPIInt *rprocs,*sprocs; 5690 const PetscInt *srow,*rstarts,*sstarts; 5691 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5692 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5693 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5694 MPI_Request *rwaits = NULL,*swaits = NULL; 5695 MPI_Status rstatus; 5696 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5697 5698 PetscFunctionBegin; 5699 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5701 5702 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5703 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5704 } 5705 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5707 5708 if (size == 1) { 5709 startsj_s = NULL; 5710 bufa_ptr = NULL; 5711 *B_oth = NULL; 5712 PetscFunctionReturn(0); 5713 } 5714 5715 ctx = a->Mvctx; 5716 tag = ((PetscObject)ctx)->tag; 5717 5718 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5719 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5720 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5721 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5722 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5723 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5724 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5725 5726 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5727 if (scall == MAT_INITIAL_MATRIX) { 5728 /* i-array */ 5729 /*---------*/ 5730 /* post receives */ 5731 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5732 for (i=0; i<nrecvs; i++) { 5733 rowlen = rvalues + rstarts[i]*rbs; 5734 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5735 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5736 } 5737 5738 /* pack the outgoing message */ 5739 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5740 5741 sstartsj[0] = 0; 5742 rstartsj[0] = 0; 5743 len = 0; /* total length of j or a array to be sent */ 5744 if (nsends) { 5745 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5746 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5747 } 5748 for (i=0; i<nsends; i++) { 5749 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5751 for (j=0; j<nrows; j++) { 5752 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5753 for (l=0; l<sbs; l++) { 5754 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5755 5756 rowlen[j*sbs+l] = ncols; 5757 5758 len += ncols; 5759 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5760 } 5761 k++; 5762 } 5763 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5764 5765 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5766 } 5767 /* recvs and sends of i-array are completed */ 5768 i = nrecvs; 5769 while (i--) { 5770 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5771 } 5772 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5773 ierr = PetscFree(svalues);CHKERRQ(ierr); 5774 5775 /* allocate buffers for sending j and a arrays */ 5776 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5777 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5778 5779 /* create i-array of B_oth */ 5780 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5781 5782 b_othi[0] = 0; 5783 len = 0; /* total length of j or a array to be received */ 5784 k = 0; 5785 for (i=0; i<nrecvs; i++) { 5786 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5787 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5788 for (j=0; j<nrows; j++) { 5789 b_othi[k+1] = b_othi[k] + rowlen[j]; 5790 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5791 k++; 5792 } 5793 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5794 } 5795 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5796 5797 /* allocate space for j and a arrrays of B_oth */ 5798 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5799 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5800 5801 /* j-array */ 5802 /*---------*/ 5803 /* post receives of j-array */ 5804 for (i=0; i<nrecvs; i++) { 5805 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5806 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5807 } 5808 5809 /* pack the outgoing message j-array */ 5810 if (nsends) k = sstarts[0]; 5811 for (i=0; i<nsends; i++) { 5812 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5813 bufJ = bufj+sstartsj[i]; 5814 for (j=0; j<nrows; j++) { 5815 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5816 for (ll=0; ll<sbs; ll++) { 5817 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5818 for (l=0; l<ncols; l++) { 5819 *bufJ++ = cols[l]; 5820 } 5821 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5822 } 5823 } 5824 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5825 } 5826 5827 /* recvs and sends of j-array are completed */ 5828 i = nrecvs; 5829 while (i--) { 5830 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5831 } 5832 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5833 } else if (scall == MAT_REUSE_MATRIX) { 5834 sstartsj = *startsj_s; 5835 rstartsj = *startsj_r; 5836 bufa = *bufa_ptr; 5837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5838 b_otha = b_oth->a; 5839 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5840 5841 /* a-array */ 5842 /*---------*/ 5843 /* post receives of a-array */ 5844 for (i=0; i<nrecvs; i++) { 5845 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5846 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5847 } 5848 5849 /* pack the outgoing message a-array */ 5850 if (nsends) k = sstarts[0]; 5851 for (i=0; i<nsends; i++) { 5852 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5853 bufA = bufa+sstartsj[i]; 5854 for (j=0; j<nrows; j++) { 5855 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5856 for (ll=0; ll<sbs; ll++) { 5857 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5858 for (l=0; l<ncols; l++) { 5859 *bufA++ = vals[l]; 5860 } 5861 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5862 } 5863 } 5864 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5865 } 5866 /* recvs and sends of a-array are completed */ 5867 i = nrecvs; 5868 while (i--) { 5869 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5870 } 5871 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5872 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5873 5874 if (scall == MAT_INITIAL_MATRIX) { 5875 /* put together the new matrix */ 5876 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5877 5878 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5879 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5880 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5881 b_oth->free_a = PETSC_TRUE; 5882 b_oth->free_ij = PETSC_TRUE; 5883 b_oth->nonew = 0; 5884 5885 ierr = PetscFree(bufj);CHKERRQ(ierr); 5886 if (!startsj_s || !bufa_ptr) { 5887 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5888 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5889 } else { 5890 *startsj_s = sstartsj; 5891 *startsj_r = rstartsj; 5892 *bufa_ptr = bufa; 5893 } 5894 } 5895 5896 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5897 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5898 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5899 PetscFunctionReturn(0); 5900 } 5901 5902 /*@C 5903 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5904 5905 Not Collective 5906 5907 Input Parameters: 5908 . A - The matrix in mpiaij format 5909 5910 Output Parameter: 5911 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5912 . colmap - A map from global column index to local index into lvec 5913 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5914 5915 Level: developer 5916 5917 @*/ 5918 #if defined(PETSC_USE_CTABLE) 5919 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5920 #else 5921 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5922 #endif 5923 { 5924 Mat_MPIAIJ *a; 5925 5926 PetscFunctionBegin; 5927 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5928 PetscValidPointer(lvec, 2); 5929 PetscValidPointer(colmap, 3); 5930 PetscValidPointer(multScatter, 4); 5931 a = (Mat_MPIAIJ*) A->data; 5932 if (lvec) *lvec = a->lvec; 5933 if (colmap) *colmap = a->colmap; 5934 if (multScatter) *multScatter = a->Mvctx; 5935 PetscFunctionReturn(0); 5936 } 5937 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5941 #if defined(PETSC_HAVE_MKL_SPARSE) 5942 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5943 #endif 5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5945 #if defined(PETSC_HAVE_ELEMENTAL) 5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5947 #endif 5948 #if defined(PETSC_HAVE_HYPRE) 5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5950 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5951 #endif 5952 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5954 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5955 5956 /* 5957 Computes (B'*A')' since computing B*A directly is untenable 5958 5959 n p p 5960 ( ) ( ) ( ) 5961 m ( A ) * n ( B ) = m ( C ) 5962 ( ) ( ) ( ) 5963 5964 */ 5965 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5966 { 5967 PetscErrorCode ierr; 5968 Mat At,Bt,Ct; 5969 5970 PetscFunctionBegin; 5971 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5972 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5973 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5974 ierr = MatDestroy(&At);CHKERRQ(ierr); 5975 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5976 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5977 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5978 PetscFunctionReturn(0); 5979 } 5980 5981 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5982 { 5983 PetscErrorCode ierr; 5984 PetscInt m=A->rmap->n,n=B->cmap->n; 5985 Mat Cmat; 5986 5987 PetscFunctionBegin; 5988 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5989 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5990 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5991 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5992 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5993 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5994 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5995 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5996 5997 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5998 5999 *C = Cmat; 6000 PetscFunctionReturn(0); 6001 } 6002 6003 /* ----------------------------------------------------------------*/ 6004 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 6005 { 6006 PetscErrorCode ierr; 6007 6008 PetscFunctionBegin; 6009 if (scall == MAT_INITIAL_MATRIX) { 6010 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 6011 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 6012 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 6013 } 6014 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 6015 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 6016 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 6017 PetscFunctionReturn(0); 6018 } 6019 6020 /*MC 6021 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6022 6023 Options Database Keys: 6024 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6025 6026 Level: beginner 6027 6028 Notes: 6029 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6030 in this case the values associated with the rows and columns one passes in are set to zero 6031 in the matrix 6032 6033 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6034 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6035 6036 .seealso: MatCreateAIJ() 6037 M*/ 6038 6039 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6040 { 6041 Mat_MPIAIJ *b; 6042 PetscErrorCode ierr; 6043 PetscMPIInt size; 6044 6045 PetscFunctionBegin; 6046 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6047 6048 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6049 B->data = (void*)b; 6050 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6051 B->assembled = PETSC_FALSE; 6052 B->insertmode = NOT_SET_VALUES; 6053 b->size = size; 6054 6055 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6056 6057 /* build cache for off array entries formed */ 6058 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6059 6060 b->donotstash = PETSC_FALSE; 6061 b->colmap = 0; 6062 b->garray = 0; 6063 b->roworiented = PETSC_TRUE; 6064 6065 /* stuff used for matrix vector multiply */ 6066 b->lvec = NULL; 6067 b->Mvctx = NULL; 6068 6069 /* stuff for MatGetRow() */ 6070 b->rowindices = 0; 6071 b->rowvalues = 0; 6072 b->getrowactive = PETSC_FALSE; 6073 6074 /* flexible pointer used in CUSP/CUSPARSE classes */ 6075 b->spptr = NULL; 6076 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6087 #if defined(PETSC_HAVE_MKL_SPARSE) 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6089 #endif 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6092 #if defined(PETSC_HAVE_ELEMENTAL) 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6094 #endif 6095 #if defined(PETSC_HAVE_HYPRE) 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6097 #endif 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6103 #if defined(PETSC_HAVE_HYPRE) 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6105 #endif 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6108 PetscFunctionReturn(0); 6109 } 6110 6111 /*@C 6112 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6113 and "off-diagonal" part of the matrix in CSR format. 6114 6115 Collective 6116 6117 Input Parameters: 6118 + comm - MPI communicator 6119 . m - number of local rows (Cannot be PETSC_DECIDE) 6120 . n - This value should be the same as the local size used in creating the 6121 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6122 calculated if N is given) For square matrices n is almost always m. 6123 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6124 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6125 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6126 . j - column indices 6127 . a - matrix values 6128 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6129 . oj - column indices 6130 - oa - matrix values 6131 6132 Output Parameter: 6133 . mat - the matrix 6134 6135 Level: advanced 6136 6137 Notes: 6138 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6139 must free the arrays once the matrix has been destroyed and not before. 6140 6141 The i and j indices are 0 based 6142 6143 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6144 6145 This sets local rows and cannot be used to set off-processor values. 6146 6147 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6148 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6149 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6150 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6151 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6152 communication if it is known that only local entries will be set. 6153 6154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6155 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6156 @*/ 6157 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6158 { 6159 PetscErrorCode ierr; 6160 Mat_MPIAIJ *maij; 6161 6162 PetscFunctionBegin; 6163 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6164 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6165 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6166 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6167 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6168 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6169 maij = (Mat_MPIAIJ*) (*mat)->data; 6170 6171 (*mat)->preallocated = PETSC_TRUE; 6172 6173 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6174 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6175 6176 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6177 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6178 6179 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6180 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6181 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6182 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6183 6184 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6185 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6186 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6187 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6188 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6189 PetscFunctionReturn(0); 6190 } 6191 6192 /* 6193 Special version for direct calls from Fortran 6194 */ 6195 #include <petsc/private/fortranimpl.h> 6196 6197 /* Change these macros so can be used in void function */ 6198 #undef CHKERRQ 6199 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6200 #undef SETERRQ2 6201 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6202 #undef SETERRQ3 6203 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6204 #undef SETERRQ 6205 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6206 6207 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6208 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6209 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6210 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6211 #else 6212 #endif 6213 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6214 { 6215 Mat mat = *mmat; 6216 PetscInt m = *mm, n = *mn; 6217 InsertMode addv = *maddv; 6218 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6219 PetscScalar value; 6220 PetscErrorCode ierr; 6221 6222 MatCheckPreallocated(mat,1); 6223 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6224 6225 #if defined(PETSC_USE_DEBUG) 6226 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6227 #endif 6228 { 6229 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6230 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6231 PetscBool roworiented = aij->roworiented; 6232 6233 /* Some Variables required in the macro */ 6234 Mat A = aij->A; 6235 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6236 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6237 MatScalar *aa = a->a; 6238 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6239 Mat B = aij->B; 6240 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6241 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6242 MatScalar *ba = b->a; 6243 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6244 * cannot use "#if defined" inside a macro. */ 6245 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6246 6247 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6248 PetscInt nonew = a->nonew; 6249 MatScalar *ap1,*ap2; 6250 6251 PetscFunctionBegin; 6252 for (i=0; i<m; i++) { 6253 if (im[i] < 0) continue; 6254 #if defined(PETSC_USE_DEBUG) 6255 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6256 #endif 6257 if (im[i] >= rstart && im[i] < rend) { 6258 row = im[i] - rstart; 6259 lastcol1 = -1; 6260 rp1 = aj + ai[row]; 6261 ap1 = aa + ai[row]; 6262 rmax1 = aimax[row]; 6263 nrow1 = ailen[row]; 6264 low1 = 0; 6265 high1 = nrow1; 6266 lastcol2 = -1; 6267 rp2 = bj + bi[row]; 6268 ap2 = ba + bi[row]; 6269 rmax2 = bimax[row]; 6270 nrow2 = bilen[row]; 6271 low2 = 0; 6272 high2 = nrow2; 6273 6274 for (j=0; j<n; j++) { 6275 if (roworiented) value = v[i*n+j]; 6276 else value = v[i+j*m]; 6277 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6278 if (in[j] >= cstart && in[j] < cend) { 6279 col = in[j] - cstart; 6280 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6281 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6282 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6283 #endif 6284 } else if (in[j] < 0) continue; 6285 #if defined(PETSC_USE_DEBUG) 6286 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6287 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6288 #endif 6289 else { 6290 if (mat->was_assembled) { 6291 if (!aij->colmap) { 6292 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6293 } 6294 #if defined(PETSC_USE_CTABLE) 6295 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6296 col--; 6297 #else 6298 col = aij->colmap[in[j]] - 1; 6299 #endif 6300 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6301 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6302 col = in[j]; 6303 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6304 B = aij->B; 6305 b = (Mat_SeqAIJ*)B->data; 6306 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6307 rp2 = bj + bi[row]; 6308 ap2 = ba + bi[row]; 6309 rmax2 = bimax[row]; 6310 nrow2 = bilen[row]; 6311 low2 = 0; 6312 high2 = nrow2; 6313 bm = aij->B->rmap->n; 6314 ba = b->a; 6315 inserted = PETSC_FALSE; 6316 } 6317 } else col = in[j]; 6318 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6319 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6320 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6321 #endif 6322 } 6323 } 6324 } else if (!aij->donotstash) { 6325 if (roworiented) { 6326 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6327 } else { 6328 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6329 } 6330 } 6331 } 6332 } 6333 PetscFunctionReturnVoid(); 6334 } 6335