1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 goto a_noinsert; \ 468 } \ 469 } \ 470 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 471 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 472 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 473 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 474 N = nrow1++ - 1; a->nz++; high1++; \ 475 /* shift up all the later entries in this row */ \ 476 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 477 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 478 rp1[_i] = col; \ 479 ap1[_i] = value; \ 480 A->nonzerostate++;\ 481 a_noinsert: ; \ 482 ailen[row] = nrow1; \ 483 } 484 485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 486 { \ 487 if (col <= lastcol2) low2 = 0; \ 488 else high2 = nrow2; \ 489 lastcol2 = col; \ 490 while (high2-low2 > 5) { \ 491 t = (low2+high2)/2; \ 492 if (rp2[t] > col) high2 = t; \ 493 else low2 = t; \ 494 } \ 495 for (_i=low2; _i<high2; _i++) { \ 496 if (rp2[_i] > col) break; \ 497 if (rp2[_i] == col) { \ 498 if (addv == ADD_VALUES) { \ 499 ap2[_i] += value; \ 500 (void)PetscLogFlops(1.0); \ 501 } \ 502 else ap2[_i] = value; \ 503 goto b_noinsert; \ 504 } \ 505 } \ 506 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 507 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 508 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 509 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 510 N = nrow2++ - 1; b->nz++; high2++; \ 511 /* shift up all the later entries in this row */ \ 512 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 513 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 514 rp2[_i] = col; \ 515 ap2[_i] = value; \ 516 B->nonzerostate++; \ 517 b_noinsert: ; \ 518 bilen[row] = nrow2; \ 519 } 520 521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 522 { 523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 525 PetscErrorCode ierr; 526 PetscInt l,*garray = mat->garray,diag; 527 528 PetscFunctionBegin; 529 /* code only works for square matrices A */ 530 531 /* find size of row to the left of the diagonal part */ 532 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 533 row = row - diag; 534 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 535 if (garray[b->j[b->i[row]+l]] > diag) break; 536 } 537 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 538 539 /* diagonal part */ 540 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 541 542 /* right of diagonal part */ 543 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 PetscScalar value = 0.0; 551 PetscErrorCode ierr; 552 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 554 PetscBool roworiented = aij->roworiented; 555 556 /* Some Variables required in the macro */ 557 Mat A = aij->A; 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 560 MatScalar *aa = a->a; 561 PetscBool ignorezeroentries = a->ignorezeroentries; 562 Mat B = aij->B; 563 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 564 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 565 MatScalar *ba = b->a; 566 567 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 568 PetscInt nonew; 569 MatScalar *ap1,*ap2; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (im[i] < 0) continue; 574 #if defined(PETSC_USE_DEBUG) 575 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 576 #endif 577 if (im[i] >= rstart && im[i] < rend) { 578 row = im[i] - rstart; 579 lastcol1 = -1; 580 rp1 = aj + ai[row]; 581 ap1 = aa + ai[row]; 582 rmax1 = aimax[row]; 583 nrow1 = ailen[row]; 584 low1 = 0; 585 high1 = nrow1; 586 lastcol2 = -1; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 594 for (j=0; j<n; j++) { 595 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 596 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 597 if (in[j] >= cstart && in[j] < cend) { 598 col = in[j] - cstart; 599 nonew = a->nonew; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) { 632 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 633 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 634 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 635 } 636 } else col = in[j]; 637 nonew = b->nonew; 638 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 639 } 640 } 641 } else { 642 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 643 if (!aij->donotstash) { 644 mat->assembled = PETSC_FALSE; 645 if (roworiented) { 646 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 647 } else { 648 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 660 */ 661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 662 { 663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 664 Mat A = aij->A; /* diagonal part of the matrix */ 665 Mat B = aij->B; /* offdiagonal part of the matrix */ 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 669 PetscInt *ailen = a->ilen,*aj = a->j; 670 PetscInt *bilen = b->ilen,*bj = b->j; 671 PetscInt am = aij->A->rmap->n,j; 672 PetscInt diag_so_far = 0,dnz; 673 PetscInt offd_so_far = 0,onz; 674 675 PetscFunctionBegin; 676 /* Iterate over all rows of the matrix */ 677 for (j=0; j<am; j++) { 678 dnz = onz = 0; 679 /* Iterate over all non-zero columns of the current row */ 680 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 681 /* If column is in the diagonal */ 682 if (mat_j[col] >= cstart && mat_j[col] < cend) { 683 aj[diag_so_far++] = mat_j[col] - cstart; 684 dnz++; 685 } else { /* off-diagonal entries */ 686 bj[offd_so_far++] = mat_j[col]; 687 onz++; 688 } 689 } 690 ailen[j] = dnz; 691 bilen[j] = onz; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /* 697 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 698 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 699 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 700 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 701 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 702 */ 703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 704 { 705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 706 Mat A = aij->A; /* diagonal part of the matrix */ 707 Mat B = aij->B; /* offdiagonal part of the matrix */ 708 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 711 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 712 PetscInt *ailen = a->ilen,*aj = a->j; 713 PetscInt *bilen = b->ilen,*bj = b->j; 714 PetscInt am = aij->A->rmap->n,j; 715 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 716 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 717 PetscScalar *aa = a->a,*ba = b->a; 718 719 PetscFunctionBegin; 720 /* Iterate over all rows of the matrix */ 721 for (j=0; j<am; j++) { 722 dnz_row = onz_row = 0; 723 rowstart_offd = full_offd_i[j]; 724 rowstart_diag = full_diag_i[j]; 725 /* Iterate over all non-zero columns of the current row */ 726 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 727 /* If column is in the diagonal */ 728 if (mat_j[col] >= cstart && mat_j[col] < cend) { 729 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 730 aa[rowstart_diag+dnz_row] = mat_a[col]; 731 dnz_row++; 732 } else { /* off-diagonal entries */ 733 bj[rowstart_offd+onz_row] = mat_j[col]; 734 ba[rowstart_offd+onz_row] = mat_a[col]; 735 onz_row++; 736 } 737 } 738 ailen[j] = dnz_row; 739 bilen[j] = onz_row; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 750 751 PetscFunctionBegin; 752 for (i=0; i<m; i++) { 753 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 754 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 755 if (idxm[i] >= rstart && idxm[i] < rend) { 756 row = idxm[i] - rstart; 757 for (j=0; j<n; j++) { 758 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 759 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 760 if (idxn[j] >= cstart && idxn[j] < cend) { 761 col = idxn[j] - cstart; 762 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } else { 764 if (!aij->colmap) { 765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 766 } 767 #if defined(PETSC_USE_CTABLE) 768 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 769 col--; 770 #else 771 col = aij->colmap[idxn[j]] - 1; 772 #endif 773 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 774 else { 775 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } 777 } 778 } 779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 785 786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 789 PetscErrorCode ierr; 790 PetscInt nstash,reallocs; 791 792 PetscFunctionBegin; 793 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 794 795 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 796 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 797 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 805 PetscErrorCode ierr; 806 PetscMPIInt n; 807 PetscInt i,j,rstart,ncols,flg; 808 PetscInt *row,*col; 809 PetscBool other_disassembled; 810 PetscScalar *val; 811 812 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 813 814 PetscFunctionBegin; 815 if (!aij->donotstash && !mat->nooffprocentries) { 816 while (1) { 817 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 818 if (!flg) break; 819 820 for (i=0; i<n; ) { 821 /* Now identify the consecutive vals belonging to the same row */ 822 for (j=i,rstart=row[j]; j<n; j++) { 823 if (row[j] != rstart) break; 824 } 825 if (j < n) ncols = j-i; 826 else ncols = n-i; 827 /* Now assemble all these values with a single function call */ 828 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 829 830 i = j; 831 } 832 } 833 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 834 } 835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 836 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 837 #endif 838 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 840 841 /* determine if any processor has disassembled, if so we must 842 also disassemble ourself, in order that we may reassemble. */ 843 /* 844 if nonzero structure of submatrix B cannot change then we know that 845 no processor disassembled thus we can skip this stuff 846 */ 847 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 848 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 849 if (mat->was_assembled && !other_disassembled) { 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 852 #endif 853 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 854 } 855 } 856 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 857 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 858 } 859 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 862 #endif 863 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 865 866 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 867 868 aij->rowvalues = 0; 869 870 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 871 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 875 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 876 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 877 } 878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 879 mat->offloadmask = PETSC_OFFLOAD_BOTH; 880 #endif 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 885 { 886 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 891 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 898 PetscObjectState sA, sB; 899 PetscInt *lrows; 900 PetscInt r, len; 901 PetscBool cong, lch, gch; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 /* get locally owned rows */ 906 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 907 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 908 /* fix right hand side if needed */ 909 if (x && b) { 910 const PetscScalar *xx; 911 PetscScalar *bb; 912 913 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 914 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 917 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 919 } 920 921 sA = mat->A->nonzerostate; 922 sB = mat->B->nonzerostate; 923 924 if (diag != 0.0 && cong) { 925 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 926 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 927 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 928 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 929 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 930 PetscInt nnwA, nnwB; 931 PetscBool nnzA, nnzB; 932 933 nnwA = aijA->nonew; 934 nnwB = aijB->nonew; 935 nnzA = aijA->keepnonzeropattern; 936 nnzB = aijB->keepnonzeropattern; 937 if (!nnzA) { 938 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 939 aijA->nonew = 0; 940 } 941 if (!nnzB) { 942 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 943 aijB->nonew = 0; 944 } 945 /* Must zero here before the next loop */ 946 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 948 for (r = 0; r < len; ++r) { 949 const PetscInt row = lrows[r] + A->rmap->rstart; 950 if (row >= A->cmap->N) continue; 951 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 952 } 953 aijA->nonew = nnwA; 954 aijB->nonew = nnwB; 955 } else { 956 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 957 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 } 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 963 /* reduce nonzerostate */ 964 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 965 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 966 if (gch) A->nonzerostate++; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 971 { 972 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscMPIInt n = A->rmap->n; 975 PetscInt i,j,r,m,len = 0; 976 PetscInt *lrows,*owners = A->rmap->range; 977 PetscMPIInt p = 0; 978 PetscSFNode *rrows; 979 PetscSF sf; 980 const PetscScalar *xx; 981 PetscScalar *bb,*mask; 982 Vec xmask,lmask; 983 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 984 const PetscInt *aj, *ii,*ridx; 985 PetscScalar *aa; 986 987 PetscFunctionBegin; 988 /* Create SF where leaves are input rows and roots are owned rows */ 989 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 990 for (r = 0; r < n; ++r) lrows[r] = -1; 991 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 992 for (r = 0; r < N; ++r) { 993 const PetscInt idx = rows[r]; 994 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 995 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 996 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 997 } 998 rrows[r].rank = p; 999 rrows[r].index = rows[r] - owners[p]; 1000 } 1001 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1002 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1003 /* Collect flags for rows to be zeroed */ 1004 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1005 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1006 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1007 /* Compress and put in row numbers */ 1008 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1009 /* zero diagonal part of matrix */ 1010 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1011 /* handle off diagonal part of matrix */ 1012 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1013 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1014 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1015 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1016 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1017 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1020 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1021 PetscBool cong; 1022 1023 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1024 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1025 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1028 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1029 } 1030 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1031 /* remove zeroed rows of off diagonal matrix */ 1032 ii = aij->i; 1033 for (i=0; i<len; i++) { 1034 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1035 } 1036 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1037 if (aij->compressedrow.use) { 1038 m = aij->compressedrow.nrows; 1039 ii = aij->compressedrow.i; 1040 ridx = aij->compressedrow.rindex; 1041 for (i=0; i<m; i++) { 1042 n = ii[i+1] - ii[i]; 1043 aj = aij->j + ii[i]; 1044 aa = aij->a + ii[i]; 1045 1046 for (j=0; j<n; j++) { 1047 if (PetscAbsScalar(mask[*aj])) { 1048 if (b) bb[*ridx] -= *aa*xx[*aj]; 1049 *aa = 0.0; 1050 } 1051 aa++; 1052 aj++; 1053 } 1054 ridx++; 1055 } 1056 } else { /* do not use compressed row format */ 1057 m = l->B->rmap->n; 1058 for (i=0; i<m; i++) { 1059 n = ii[i+1] - ii[i]; 1060 aj = aij->j + ii[i]; 1061 aa = aij->a + ii[i]; 1062 for (j=0; j<n; j++) { 1063 if (PetscAbsScalar(mask[*aj])) { 1064 if (b) bb[i] -= *aa*xx[*aj]; 1065 *aa = 0.0; 1066 } 1067 aa++; 1068 aj++; 1069 } 1070 } 1071 } 1072 if (x && b) { 1073 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1074 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1075 } 1076 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1077 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1078 ierr = PetscFree(lrows);CHKERRQ(ierr); 1079 1080 /* only change matrix nonzero state if pattern was allowed to be changed */ 1081 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1082 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1083 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt nt; 1093 VecScatter Mvctx = a->Mvctx; 1094 1095 PetscFunctionBegin; 1096 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1097 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1098 1099 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1100 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1101 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1107 { 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 VecScatter Mvctx = a->Mvctx; 1121 1122 PetscFunctionBegin; 1123 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1124 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1126 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 /* do nondiagonal part */ 1138 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* add partial results together */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1148 { 1149 MPI_Comm comm; 1150 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1151 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1152 IS Me,Notme; 1153 PetscErrorCode ierr; 1154 PetscInt M,N,first,last,*notme,i; 1155 PetscBool lf; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 /* Easy test: symmetric diagonal block */ 1160 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1161 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1162 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1190 { 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1199 { 1200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 /* do nondiagonal part */ 1205 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* add partial results together */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* 1215 This only works correctly for square matrices where the subblock A->A is the 1216 diagonal block 1217 */ 1218 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1219 { 1220 PetscErrorCode ierr; 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 1223 PetscFunctionBegin; 1224 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1225 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1226 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1231 { 1232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1237 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1242 { 1243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 #if defined(PETSC_USE_LOG) 1248 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1249 #endif 1250 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1251 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1252 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1254 #if defined(PETSC_USE_CTABLE) 1255 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1256 #else 1257 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1258 #endif 1259 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1261 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1262 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1277 #if defined(PETSC_HAVE_ELEMENTAL) 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1279 #endif 1280 #if defined(PETSC_HAVE_HYPRE) 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1283 #endif 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 FILE *file; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 1313 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1327 1328 /* store the row lengths to the file */ 1329 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1330 if (!rank) { 1331 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 for (i=1; i<size; i++) { 1333 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1334 rlen = range[i+1] - range[i]; 1335 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 } 1338 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1341 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1343 } 1344 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1345 1346 /* load up the local column indices */ 1347 nzmax = nz; /* th processor needs space a largest processor needs */ 1348 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1349 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1350 cnt = 0; 1351 for (i=0; i<mat->rmap->n; i++) { 1352 for (j=B->i[i]; j<B->i[i+1]; j++) { 1353 if ((col = garray[B->j[j]]) > cstart) break; 1354 column_indices[cnt++] = col; 1355 } 1356 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1357 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1358 } 1359 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1360 1361 /* store the column indices to the file */ 1362 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1363 if (!rank) { 1364 MPI_Status status; 1365 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1366 for (i=1; i<size; i++) { 1367 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1368 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1369 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1370 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1371 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 } 1373 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1374 } else { 1375 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1376 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1378 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1379 } 1380 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1381 1382 /* load up the local column values */ 1383 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1384 cnt = 0; 1385 for (i=0; i<mat->rmap->n; i++) { 1386 for (j=B->i[i]; j<B->i[i+1]; j++) { 1387 if (garray[B->j[j]] > cstart) break; 1388 column_values[cnt++] = B->a[j]; 1389 } 1390 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1391 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1392 } 1393 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1394 1395 /* store the column values to the file */ 1396 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1397 if (!rank) { 1398 MPI_Status status; 1399 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1400 for (i=1; i<size; i++) { 1401 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1402 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1403 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1404 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1405 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 } 1407 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1408 } else { 1409 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1410 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1411 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1412 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1413 } 1414 ierr = PetscFree(column_values);CHKERRQ(ierr); 1415 1416 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1417 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #include <petscdraw.h> 1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 PetscErrorCode ierr; 1426 PetscMPIInt rank = aij->rank,size = aij->size; 1427 PetscBool isdraw,iascii,isbinary; 1428 PetscViewer sviewer; 1429 PetscViewerFormat format; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 if (iascii) { 1436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1437 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1438 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1439 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1440 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1441 for (i=0; i<(PetscInt)size; i++) { 1442 nmax = PetscMax(nmax,nz[i]); 1443 nmin = PetscMin(nmin,nz[i]); 1444 navg += nz[i]; 1445 } 1446 ierr = PetscFree(nz);CHKERRQ(ierr); 1447 navg = navg/size; 1448 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1452 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1453 MatInfo info; 1454 PetscBool inodes; 1455 1456 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1457 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1458 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1460 if (!inodes) { 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1462 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1463 } else { 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1465 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1466 } 1467 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1469 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1471 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1474 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1477 PetscInt inodecount,inodelimit,*inodes; 1478 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1479 if (inodes) { 1480 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1481 } else { 1482 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1486 PetscFunctionReturn(0); 1487 } 1488 } else if (isbinary) { 1489 if (size == 1) { 1490 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1491 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1492 } else { 1493 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1494 } 1495 PetscFunctionReturn(0); 1496 } else if (iascii && size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } else if (isdraw) { 1501 PetscDraw draw; 1502 PetscBool isnull; 1503 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1504 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1505 if (isnull) PetscFunctionReturn(0); 1506 } 1507 1508 { /* assemble the entire matrix onto first processor */ 1509 Mat A = NULL, Av; 1510 IS isrow,iscol; 1511 1512 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1513 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1514 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1515 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1516 /* The commented code uses MatCreateSubMatrices instead */ 1517 /* 1518 Mat *AA, A = NULL, Av; 1519 IS isrow,iscol; 1520 1521 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1522 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1523 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1526 A = AA[0]; 1527 Av = AA[0]; 1528 } 1529 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1530 */ 1531 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1532 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1533 /* 1534 Everyone has to call to draw the matrix since the graphics waits are 1535 synchronized across all processors that share the PetscDraw object 1536 */ 1537 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1538 if (!rank) { 1539 if (((PetscObject)mat)->name) { 1540 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1541 } 1542 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1543 } 1544 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1545 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1546 ierr = MatDestroy(&A);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1552 { 1553 PetscErrorCode ierr; 1554 PetscBool iascii,isdraw,issocket,isbinary; 1555 1556 PetscFunctionBegin; 1557 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1560 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1561 if (iascii || isdraw || isbinary || issocket) { 1562 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1568 { 1569 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1570 PetscErrorCode ierr; 1571 Vec bb1 = 0; 1572 PetscBool hasop; 1573 1574 PetscFunctionBegin; 1575 if (flag == SOR_APPLY_UPPER) { 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1581 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1582 } 1583 1584 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1585 if (flag & SOR_ZERO_INITIAL_GUESS) { 1586 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1587 its--; 1588 } 1589 1590 while (its--) { 1591 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 1594 /* update rhs: bb1 = bb - B*x */ 1595 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1596 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1597 1598 /* local sweep */ 1599 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1600 } 1601 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1602 if (flag & SOR_ZERO_INITIAL_GUESS) { 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1604 its--; 1605 } 1606 while (its--) { 1607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* update rhs: bb1 = bb - B*x */ 1611 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1612 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1616 } 1617 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 while (its--) { 1623 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* update rhs: bb1 = bb - B*x */ 1627 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1628 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1632 } 1633 } else if (flag & SOR_EISENSTAT) { 1634 Vec xx1; 1635 1636 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1637 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1638 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 if (!mat->diag) { 1642 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1643 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1644 } 1645 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1646 if (hasop) { 1647 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1648 } else { 1649 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1650 } 1651 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1652 1653 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1657 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1658 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1659 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1660 1661 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1662 1663 matin->factorerrortype = mat->A->factorerrortype; 1664 PetscFunctionReturn(0); 1665 } 1666 1667 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1668 { 1669 Mat aA,aB,Aperm; 1670 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1671 PetscScalar *aa,*ba; 1672 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1673 PetscSF rowsf,sf; 1674 IS parcolp = NULL; 1675 PetscBool done; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1680 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1681 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1682 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1683 1684 /* Invert row permutation to find out where my rows should go */ 1685 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1686 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1687 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1688 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1689 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1690 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1691 1692 /* Invert column permutation to find out where my columns should go */ 1693 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1694 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1695 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1696 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1697 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1698 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1699 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1700 1701 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1702 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1703 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1704 1705 /* Find out where my gcols should go */ 1706 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1707 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1708 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1709 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1710 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1711 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1712 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1713 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1714 1715 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1716 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1717 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1718 for (i=0; i<m; i++) { 1719 PetscInt row = rdest[i]; 1720 PetscMPIInt rowner; 1721 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1722 for (j=ai[i]; j<ai[i+1]; j++) { 1723 PetscInt col = cdest[aj[j]]; 1724 PetscMPIInt cowner; 1725 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1726 if (rowner == cowner) dnnz[i]++; 1727 else onnz[i]++; 1728 } 1729 for (j=bi[i]; j<bi[i+1]; j++) { 1730 PetscInt col = gcdest[bj[j]]; 1731 PetscMPIInt cowner; 1732 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1733 if (rowner == cowner) dnnz[i]++; 1734 else onnz[i]++; 1735 } 1736 } 1737 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1738 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1739 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1740 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1741 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1742 1743 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1744 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1745 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1746 for (i=0; i<m; i++) { 1747 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1748 PetscInt j0,rowlen; 1749 rowlen = ai[i+1] - ai[i]; 1750 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1751 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1752 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1753 } 1754 rowlen = bi[i+1] - bi[i]; 1755 for (j0=j=0; j<rowlen; j0=j) { 1756 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1757 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1758 } 1759 } 1760 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1761 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1762 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1763 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1764 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1765 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1766 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1767 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1768 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1769 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1770 *B = Aperm; 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1775 { 1776 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1781 if (ghosts) *ghosts = aij->garray; 1782 PetscFunctionReturn(0); 1783 } 1784 1785 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1786 { 1787 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1788 Mat A = mat->A,B = mat->B; 1789 PetscErrorCode ierr; 1790 PetscLogDouble isend[5],irecv[5]; 1791 1792 PetscFunctionBegin; 1793 info->block_size = 1.0; 1794 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1795 1796 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1797 isend[3] = info->memory; isend[4] = info->mallocs; 1798 1799 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1800 1801 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1802 isend[3] += info->memory; isend[4] += info->mallocs; 1803 if (flag == MAT_LOCAL) { 1804 info->nz_used = isend[0]; 1805 info->nz_allocated = isend[1]; 1806 info->nz_unneeded = isend[2]; 1807 info->memory = isend[3]; 1808 info->mallocs = isend[4]; 1809 } else if (flag == MAT_GLOBAL_MAX) { 1810 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1811 1812 info->nz_used = irecv[0]; 1813 info->nz_allocated = irecv[1]; 1814 info->nz_unneeded = irecv[2]; 1815 info->memory = irecv[3]; 1816 info->mallocs = irecv[4]; 1817 } else if (flag == MAT_GLOBAL_SUM) { 1818 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1819 1820 info->nz_used = irecv[0]; 1821 info->nz_allocated = irecv[1]; 1822 info->nz_unneeded = irecv[2]; 1823 info->memory = irecv[3]; 1824 info->mallocs = irecv[4]; 1825 } 1826 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1827 info->fill_ratio_needed = 0; 1828 info->factor_mallocs = 0; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1833 { 1834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1835 PetscErrorCode ierr; 1836 1837 PetscFunctionBegin; 1838 switch (op) { 1839 case MAT_NEW_NONZERO_LOCATIONS: 1840 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1841 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1842 case MAT_KEEP_NONZERO_PATTERN: 1843 case MAT_NEW_NONZERO_LOCATION_ERR: 1844 case MAT_USE_INODES: 1845 case MAT_IGNORE_ZERO_ENTRIES: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1849 break; 1850 case MAT_ROW_ORIENTED: 1851 MatCheckPreallocated(A,1); 1852 a->roworiented = flg; 1853 1854 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1855 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1856 break; 1857 case MAT_NEW_DIAGONALS: 1858 case MAT_SORTED_FULL: 1859 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1860 break; 1861 case MAT_IGNORE_OFF_PROC_ENTRIES: 1862 a->donotstash = flg; 1863 break; 1864 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1865 case MAT_SPD: 1866 case MAT_SYMMETRIC: 1867 case MAT_STRUCTURALLY_SYMMETRIC: 1868 case MAT_HERMITIAN: 1869 case MAT_SYMMETRY_ETERNAL: 1870 break; 1871 case MAT_SUBMAT_SINGLEIS: 1872 A->submat_singleis = flg; 1873 break; 1874 case MAT_STRUCTURE_ONLY: 1875 /* The option is handled directly by MatSetOption() */ 1876 break; 1877 default: 1878 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1884 { 1885 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1886 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1887 PetscErrorCode ierr; 1888 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1889 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1890 PetscInt *cmap,*idx_p; 1891 1892 PetscFunctionBegin; 1893 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1894 mat->getrowactive = PETSC_TRUE; 1895 1896 if (!mat->rowvalues && (idx || v)) { 1897 /* 1898 allocate enough space to hold information from the longest row. 1899 */ 1900 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1901 PetscInt max = 1,tmp; 1902 for (i=0; i<matin->rmap->n; i++) { 1903 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1904 if (max < tmp) max = tmp; 1905 } 1906 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1907 } 1908 1909 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1910 lrow = row - rstart; 1911 1912 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1913 if (!v) {pvA = 0; pvB = 0;} 1914 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1915 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1916 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1917 nztot = nzA + nzB; 1918 1919 cmap = mat->garray; 1920 if (v || idx) { 1921 if (nztot) { 1922 /* Sort by increasing column numbers, assuming A and B already sorted */ 1923 PetscInt imark = -1; 1924 if (v) { 1925 *v = v_p = mat->rowvalues; 1926 for (i=0; i<nzB; i++) { 1927 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1928 else break; 1929 } 1930 imark = i; 1931 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1932 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1933 } 1934 if (idx) { 1935 *idx = idx_p = mat->rowindices; 1936 if (imark > -1) { 1937 for (i=0; i<imark; i++) { 1938 idx_p[i] = cmap[cworkB[i]]; 1939 } 1940 } else { 1941 for (i=0; i<nzB; i++) { 1942 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1943 else break; 1944 } 1945 imark = i; 1946 } 1947 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1948 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1949 } 1950 } else { 1951 if (idx) *idx = 0; 1952 if (v) *v = 0; 1953 } 1954 } 1955 *nz = nztot; 1956 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1957 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 1965 PetscFunctionBegin; 1966 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1967 aij->getrowactive = PETSC_FALSE; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1974 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1975 PetscErrorCode ierr; 1976 PetscInt i,j,cstart = mat->cmap->rstart; 1977 PetscReal sum = 0.0; 1978 MatScalar *v; 1979 1980 PetscFunctionBegin; 1981 if (aij->size == 1) { 1982 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1983 } else { 1984 if (type == NORM_FROBENIUS) { 1985 v = amat->a; 1986 for (i=0; i<amat->nz; i++) { 1987 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1988 } 1989 v = bmat->a; 1990 for (i=0; i<bmat->nz; i++) { 1991 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1992 } 1993 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1994 *norm = PetscSqrtReal(*norm); 1995 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1996 } else if (type == NORM_1) { /* max column norm */ 1997 PetscReal *tmp,*tmp2; 1998 PetscInt *jj,*garray = aij->garray; 1999 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 2000 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 2001 *norm = 0.0; 2002 v = amat->a; jj = amat->j; 2003 for (j=0; j<amat->nz; j++) { 2004 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2005 } 2006 v = bmat->a; jj = bmat->j; 2007 for (j=0; j<bmat->nz; j++) { 2008 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2009 } 2010 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2011 for (j=0; j<mat->cmap->N; j++) { 2012 if (tmp2[j] > *norm) *norm = tmp2[j]; 2013 } 2014 ierr = PetscFree(tmp);CHKERRQ(ierr); 2015 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2016 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2017 } else if (type == NORM_INFINITY) { /* max row norm */ 2018 PetscReal ntemp = 0.0; 2019 for (j=0; j<aij->A->rmap->n; j++) { 2020 v = amat->a + amat->i[j]; 2021 sum = 0.0; 2022 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2023 sum += PetscAbsScalar(*v); v++; 2024 } 2025 v = bmat->a + bmat->i[j]; 2026 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2027 sum += PetscAbsScalar(*v); v++; 2028 } 2029 if (sum > ntemp) ntemp = sum; 2030 } 2031 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2032 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2033 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2039 { 2040 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2041 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2042 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2043 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2044 PetscErrorCode ierr; 2045 Mat B,A_diag,*B_diag; 2046 const MatScalar *array; 2047 2048 PetscFunctionBegin; 2049 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2050 ai = Aloc->i; aj = Aloc->j; 2051 bi = Bloc->i; bj = Bloc->j; 2052 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2053 PetscInt *d_nnz,*g_nnz,*o_nnz; 2054 PetscSFNode *oloc; 2055 PETSC_UNUSED PetscSF sf; 2056 2057 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2058 /* compute d_nnz for preallocation */ 2059 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2060 for (i=0; i<ai[ma]; i++) { 2061 d_nnz[aj[i]]++; 2062 } 2063 /* compute local off-diagonal contributions */ 2064 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2065 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2066 /* map those to global */ 2067 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2068 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2069 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2070 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2071 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2072 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2073 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2074 2075 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2076 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2077 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2078 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2079 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2080 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2081 } else { 2082 B = *matout; 2083 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2084 } 2085 2086 b = (Mat_MPIAIJ*)B->data; 2087 A_diag = a->A; 2088 B_diag = &b->A; 2089 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2090 A_diag_ncol = A_diag->cmap->N; 2091 B_diag_ilen = sub_B_diag->ilen; 2092 B_diag_i = sub_B_diag->i; 2093 2094 /* Set ilen for diagonal of B */ 2095 for (i=0; i<A_diag_ncol; i++) { 2096 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2097 } 2098 2099 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2100 very quickly (=without using MatSetValues), because all writes are local. */ 2101 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2102 2103 /* copy over the B part */ 2104 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2105 array = Bloc->a; 2106 row = A->rmap->rstart; 2107 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2108 cols_tmp = cols; 2109 for (i=0; i<mb; i++) { 2110 ncol = bi[i+1]-bi[i]; 2111 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2112 row++; 2113 array += ncol; cols_tmp += ncol; 2114 } 2115 ierr = PetscFree(cols);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2120 *matout = B; 2121 } else { 2122 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2128 { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 Mat a = aij->A,b = aij->B; 2131 PetscErrorCode ierr; 2132 PetscInt s1,s2,s3; 2133 2134 PetscFunctionBegin; 2135 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2136 if (rr) { 2137 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2138 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2139 /* Overlap communication with computation. */ 2140 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2141 } 2142 if (ll) { 2143 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2144 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2145 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2146 } 2147 /* scale the diagonal block */ 2148 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2149 2150 if (rr) { 2151 /* Do a scatter end and then right scale the off-diagonal block */ 2152 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2153 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 2158 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2169 { 2170 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2171 Mat a,b,c,d; 2172 PetscBool flg; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 a = matA->A; b = matA->B; 2177 c = matB->A; d = matB->B; 2178 2179 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2180 if (flg) { 2181 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2182 } 2183 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2191 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2192 2193 PetscFunctionBegin; 2194 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2195 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2196 /* because of the column compression in the off-processor part of the matrix a->B, 2197 the number of columns in a->B and b->B may be different, hence we cannot call 2198 the MatCopy() directly on the two parts. If need be, we can provide a more 2199 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2200 then copying the submatrices */ 2201 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2202 } else { 2203 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2204 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2205 } 2206 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2211 { 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 /* 2220 Computes the number of nonzeros per row needed for preallocation when X and Y 2221 have different nonzero structure. 2222 */ 2223 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2224 { 2225 PetscInt i,j,k,nzx,nzy; 2226 2227 PetscFunctionBegin; 2228 /* Set the number of nonzeros in the new matrix */ 2229 for (i=0; i<m; i++) { 2230 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2231 nzx = xi[i+1] - xi[i]; 2232 nzy = yi[i+1] - yi[i]; 2233 nnz[i] = 0; 2234 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2235 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2236 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2237 nnz[i]++; 2238 } 2239 for (; k<nzy; k++) nnz[i]++; 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2245 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2246 { 2247 PetscErrorCode ierr; 2248 PetscInt m = Y->rmap->N; 2249 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2250 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2251 2252 PetscFunctionBegin; 2253 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2258 { 2259 PetscErrorCode ierr; 2260 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2261 PetscBLASInt bnz,one=1; 2262 Mat_SeqAIJ *x,*y; 2263 2264 PetscFunctionBegin; 2265 if (str == SAME_NONZERO_PATTERN) { 2266 PetscScalar alpha = a; 2267 x = (Mat_SeqAIJ*)xx->A->data; 2268 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2269 y = (Mat_SeqAIJ*)yy->A->data; 2270 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2271 x = (Mat_SeqAIJ*)xx->B->data; 2272 y = (Mat_SeqAIJ*)yy->B->data; 2273 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2274 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2275 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2276 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2277 will be updated */ 2278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2279 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2280 Y->offloadmask = PETSC_OFFLOAD_CPU; 2281 } 2282 #endif 2283 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2284 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2285 } else { 2286 Mat B; 2287 PetscInt *nnz_d,*nnz_o; 2288 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2289 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2290 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2291 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2292 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2293 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2294 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2295 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2296 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2297 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2298 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2299 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2300 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2301 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2302 } 2303 PetscFunctionReturn(0); 2304 } 2305 2306 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2307 2308 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2309 { 2310 #if defined(PETSC_USE_COMPLEX) 2311 PetscErrorCode ierr; 2312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2313 2314 PetscFunctionBegin; 2315 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2316 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2317 #else 2318 PetscFunctionBegin; 2319 #endif 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2330 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2335 { 2336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2341 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2348 PetscErrorCode ierr; 2349 PetscInt i,*idxb = 0; 2350 PetscScalar *va,*vb; 2351 Vec vtmp; 2352 2353 PetscFunctionBegin; 2354 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2355 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2356 if (idx) { 2357 for (i=0; i<A->rmap->n; i++) { 2358 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2359 } 2360 } 2361 2362 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2363 if (idx) { 2364 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2365 } 2366 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2367 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2368 2369 for (i=0; i<A->rmap->n; i++) { 2370 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2371 va[i] = vb[i]; 2372 if (idx) idx[i] = a->garray[idxb[i]]; 2373 } 2374 } 2375 2376 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2378 ierr = PetscFree(idxb);CHKERRQ(ierr); 2379 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2384 { 2385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2386 PetscErrorCode ierr; 2387 PetscInt i,*idxb = 0; 2388 PetscScalar *va,*vb; 2389 Vec vtmp; 2390 2391 PetscFunctionBegin; 2392 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2394 if (idx) { 2395 for (i=0; i<A->cmap->n; i++) { 2396 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2397 } 2398 } 2399 2400 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2401 if (idx) { 2402 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2403 } 2404 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2405 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2406 2407 for (i=0; i<A->rmap->n; i++) { 2408 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2409 va[i] = vb[i]; 2410 if (idx) idx[i] = a->garray[idxb[i]]; 2411 } 2412 } 2413 2414 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2416 ierr = PetscFree(idxb);CHKERRQ(ierr); 2417 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2422 { 2423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2424 PetscInt n = A->rmap->n; 2425 PetscInt cstart = A->cmap->rstart; 2426 PetscInt *cmap = mat->garray; 2427 PetscInt *diagIdx, *offdiagIdx; 2428 Vec diagV, offdiagV; 2429 PetscScalar *a, *diagA, *offdiagA; 2430 PetscInt r; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2436 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2437 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2438 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2440 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2441 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2442 for (r = 0; r < n; ++r) { 2443 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2444 a[r] = diagA[r]; 2445 idx[r] = cstart + diagIdx[r]; 2446 } else { 2447 a[r] = offdiagA[r]; 2448 idx[r] = cmap[offdiagIdx[r]]; 2449 } 2450 } 2451 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2454 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2455 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2456 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2461 { 2462 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2463 PetscInt n = A->rmap->n; 2464 PetscInt cstart = A->cmap->rstart; 2465 PetscInt *cmap = mat->garray; 2466 PetscInt *diagIdx, *offdiagIdx; 2467 Vec diagV, offdiagV; 2468 PetscScalar *a, *diagA, *offdiagA; 2469 PetscInt r; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2478 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2479 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2480 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2481 for (r = 0; r < n; ++r) { 2482 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2483 a[r] = diagA[r]; 2484 idx[r] = cstart + diagIdx[r]; 2485 } else { 2486 a[r] = offdiagA[r]; 2487 idx[r] = cmap[offdiagIdx[r]]; 2488 } 2489 } 2490 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2491 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2492 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2493 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2494 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2495 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2500 { 2501 PetscErrorCode ierr; 2502 Mat *dummy; 2503 2504 PetscFunctionBegin; 2505 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2506 *newmat = *dummy; 2507 ierr = PetscFree(dummy);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2512 { 2513 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2518 A->factorerrortype = a->A->factorerrortype; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2523 { 2524 PetscErrorCode ierr; 2525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2526 2527 PetscFunctionBegin; 2528 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2529 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2530 if (x->assembled) { 2531 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2532 } else { 2533 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2534 } 2535 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2536 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2541 { 2542 PetscFunctionBegin; 2543 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2544 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /*@ 2549 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2550 2551 Collective on Mat 2552 2553 Input Parameters: 2554 + A - the matrix 2555 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2556 2557 Level: advanced 2558 2559 @*/ 2560 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2561 { 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2570 { 2571 PetscErrorCode ierr; 2572 PetscBool sc = PETSC_FALSE,flg; 2573 2574 PetscFunctionBegin; 2575 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2576 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2577 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2578 if (flg) { 2579 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2580 } 2581 ierr = PetscOptionsTail();CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2589 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2590 2591 PetscFunctionBegin; 2592 if (!Y->preallocated) { 2593 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2594 } else if (!aij->nz) { 2595 PetscInt nonew = aij->nonew; 2596 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2597 aij->nonew = nonew; 2598 } 2599 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2604 { 2605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2610 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2611 if (d) { 2612 PetscInt rstart; 2613 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2614 *d += rstart; 2615 2616 } 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2621 { 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 PetscErrorCode ierr; 2624 2625 PetscFunctionBegin; 2626 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 /* -------------------------------------------------------------------*/ 2631 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2632 MatGetRow_MPIAIJ, 2633 MatRestoreRow_MPIAIJ, 2634 MatMult_MPIAIJ, 2635 /* 4*/ MatMultAdd_MPIAIJ, 2636 MatMultTranspose_MPIAIJ, 2637 MatMultTransposeAdd_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 /*10*/ 0, 2642 0, 2643 0, 2644 MatSOR_MPIAIJ, 2645 MatTranspose_MPIAIJ, 2646 /*15*/ MatGetInfo_MPIAIJ, 2647 MatEqual_MPIAIJ, 2648 MatGetDiagonal_MPIAIJ, 2649 MatDiagonalScale_MPIAIJ, 2650 MatNorm_MPIAIJ, 2651 /*20*/ MatAssemblyBegin_MPIAIJ, 2652 MatAssemblyEnd_MPIAIJ, 2653 MatSetOption_MPIAIJ, 2654 MatZeroEntries_MPIAIJ, 2655 /*24*/ MatZeroRows_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*29*/ MatSetUp_MPIAIJ, 2661 0, 2662 0, 2663 MatGetDiagonalBlock_MPIAIJ, 2664 0, 2665 /*34*/ MatDuplicate_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*39*/ MatAXPY_MPIAIJ, 2671 MatCreateSubMatrices_MPIAIJ, 2672 MatIncreaseOverlap_MPIAIJ, 2673 MatGetValues_MPIAIJ, 2674 MatCopy_MPIAIJ, 2675 /*44*/ MatGetRowMax_MPIAIJ, 2676 MatScale_MPIAIJ, 2677 MatShift_MPIAIJ, 2678 MatDiagonalSet_MPIAIJ, 2679 MatZeroRowsColumns_MPIAIJ, 2680 /*49*/ MatSetRandom_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*54*/ MatFDColoringCreate_MPIXAIJ, 2686 0, 2687 MatSetUnfactored_MPIAIJ, 2688 MatPermute_MPIAIJ, 2689 0, 2690 /*59*/ MatCreateSubMatrix_MPIAIJ, 2691 MatDestroy_MPIAIJ, 2692 MatView_MPIAIJ, 2693 0, 2694 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2695 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2696 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2697 0, 2698 0, 2699 0, 2700 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2701 MatGetRowMinAbs_MPIAIJ, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*75*/ MatFDColoringApply_AIJ, 2707 MatSetFromOptions_MPIAIJ, 2708 0, 2709 0, 2710 MatFindZeroDiagonals_MPIAIJ, 2711 /*80*/ 0, 2712 0, 2713 0, 2714 /*83*/ MatLoad_MPIAIJ, 2715 MatIsSymmetric_MPIAIJ, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2721 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2722 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2723 MatPtAP_MPIAIJ_MPIAIJ, 2724 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2725 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2726 0, 2727 0, 2728 0, 2729 MatPinToCPU_MPIAIJ, 2730 /*99*/ 0, 2731 0, 2732 0, 2733 MatConjugate_MPIAIJ, 2734 0, 2735 /*104*/MatSetValuesRow_MPIAIJ, 2736 MatRealPart_MPIAIJ, 2737 MatImaginaryPart_MPIAIJ, 2738 0, 2739 0, 2740 /*109*/0, 2741 0, 2742 MatGetRowMin_MPIAIJ, 2743 0, 2744 MatMissingDiagonal_MPIAIJ, 2745 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2746 0, 2747 MatGetGhosts_MPIAIJ, 2748 0, 2749 0, 2750 /*119*/0, 2751 0, 2752 0, 2753 0, 2754 MatGetMultiProcBlock_MPIAIJ, 2755 /*124*/MatFindNonzeroRows_MPIAIJ, 2756 MatGetColumnNorms_MPIAIJ, 2757 MatInvertBlockDiagonal_MPIAIJ, 2758 MatInvertVariableBlockDiagonal_MPIAIJ, 2759 MatCreateSubMatricesMPI_MPIAIJ, 2760 /*129*/0, 2761 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2762 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2763 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2764 0, 2765 /*134*/0, 2766 0, 2767 MatRARt_MPIAIJ_MPIAIJ, 2768 0, 2769 0, 2770 /*139*/MatSetBlockSizes_MPIAIJ, 2771 0, 2772 0, 2773 MatFDColoringSetUp_MPIXAIJ, 2774 MatFindOffBlockDiagonalEntries_MPIAIJ, 2775 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2776 }; 2777 2778 /* ----------------------------------------------------------------------------------------*/ 2779 2780 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2781 { 2782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2787 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2792 { 2793 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2798 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2803 { 2804 Mat_MPIAIJ *b; 2805 PetscErrorCode ierr; 2806 PetscMPIInt size; 2807 2808 PetscFunctionBegin; 2809 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2810 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2811 b = (Mat_MPIAIJ*)B->data; 2812 2813 #if defined(PETSC_USE_CTABLE) 2814 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2815 #else 2816 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2817 #endif 2818 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2819 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2820 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2821 2822 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2823 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2824 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2825 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2826 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2827 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2828 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2829 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2830 2831 if (!B->preallocated) { 2832 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2833 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2834 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2835 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2837 } 2838 2839 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2840 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2841 B->preallocated = PETSC_TRUE; 2842 B->was_assembled = PETSC_FALSE; 2843 B->assembled = PETSC_FALSE; 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2848 { 2849 Mat_MPIAIJ *b; 2850 PetscErrorCode ierr; 2851 2852 PetscFunctionBegin; 2853 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2854 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2856 b = (Mat_MPIAIJ*)B->data; 2857 2858 #if defined(PETSC_USE_CTABLE) 2859 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2860 #else 2861 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2862 #endif 2863 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2864 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2865 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2866 2867 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2868 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2869 B->preallocated = PETSC_TRUE; 2870 B->was_assembled = PETSC_FALSE; 2871 B->assembled = PETSC_FALSE; 2872 PetscFunctionReturn(0); 2873 } 2874 2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2876 { 2877 Mat mat; 2878 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2879 PetscErrorCode ierr; 2880 2881 PetscFunctionBegin; 2882 *newmat = 0; 2883 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2884 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2885 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2886 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2887 a = (Mat_MPIAIJ*)mat->data; 2888 2889 mat->factortype = matin->factortype; 2890 mat->assembled = PETSC_TRUE; 2891 mat->insertmode = NOT_SET_VALUES; 2892 mat->preallocated = PETSC_TRUE; 2893 2894 a->size = oldmat->size; 2895 a->rank = oldmat->rank; 2896 a->donotstash = oldmat->donotstash; 2897 a->roworiented = oldmat->roworiented; 2898 a->rowindices = 0; 2899 a->rowvalues = 0; 2900 a->getrowactive = PETSC_FALSE; 2901 2902 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2903 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2904 2905 if (oldmat->colmap) { 2906 #if defined(PETSC_USE_CTABLE) 2907 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2908 #else 2909 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2911 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2912 #endif 2913 } else a->colmap = 0; 2914 if (oldmat->garray) { 2915 PetscInt len; 2916 len = oldmat->B->cmap->n; 2917 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2918 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2919 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2920 } else a->garray = 0; 2921 2922 /* It may happen MatDuplicate is called with a non-assembled matrix 2923 In fact, MatDuplicate only requires the matrix to be preallocated 2924 This may happen inside a DMCreateMatrix_Shell */ 2925 if (oldmat->lvec) { 2926 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2927 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2928 } 2929 if (oldmat->Mvctx) { 2930 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2931 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2932 } 2933 if (oldmat->Mvctx_mpi1) { 2934 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2935 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2936 } 2937 2938 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2939 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2940 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2941 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2942 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2943 *newmat = mat; 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2948 { 2949 PetscBool isbinary, ishdf5; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2954 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2955 /* force binary viewer to load .info file if it has not yet done so */ 2956 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2957 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2958 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2959 if (isbinary) { 2960 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2961 } else if (ishdf5) { 2962 #if defined(PETSC_HAVE_HDF5) 2963 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2964 #else 2965 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2966 #endif 2967 } else { 2968 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2969 } 2970 PetscFunctionReturn(0); 2971 } 2972 2973 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2974 { 2975 PetscScalar *vals,*svals; 2976 MPI_Comm comm; 2977 PetscErrorCode ierr; 2978 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2979 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2980 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2981 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2982 PetscInt cend,cstart,n,*rowners; 2983 int fd; 2984 PetscInt bs = newMat->rmap->bs; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2988 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2989 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2990 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2991 if (!rank) { 2992 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2993 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2994 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2995 } 2996 2997 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2998 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2999 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3000 if (bs < 0) bs = 1; 3001 3002 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3003 M = header[1]; N = header[2]; 3004 3005 /* If global sizes are set, check if they are consistent with that given in the file */ 3006 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3007 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3008 3009 /* determine ownership of all (block) rows */ 3010 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3011 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3012 else m = newMat->rmap->n; /* Set by user */ 3013 3014 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3015 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3016 3017 /* First process needs enough room for process with most rows */ 3018 if (!rank) { 3019 mmax = rowners[1]; 3020 for (i=2; i<=size; i++) { 3021 mmax = PetscMax(mmax, rowners[i]); 3022 } 3023 } else mmax = -1; /* unused, but compilers complain */ 3024 3025 rowners[0] = 0; 3026 for (i=2; i<=size; i++) { 3027 rowners[i] += rowners[i-1]; 3028 } 3029 rstart = rowners[rank]; 3030 rend = rowners[rank+1]; 3031 3032 /* distribute row lengths to all processors */ 3033 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3034 if (!rank) { 3035 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3036 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3037 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3038 for (j=0; j<m; j++) { 3039 procsnz[0] += ourlens[j]; 3040 } 3041 for (i=1; i<size; i++) { 3042 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3043 /* calculate the number of nonzeros on each processor */ 3044 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3045 procsnz[i] += rowlengths[j]; 3046 } 3047 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3048 } 3049 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3050 } else { 3051 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3052 } 3053 3054 if (!rank) { 3055 /* determine max buffer needed and allocate it */ 3056 maxnz = 0; 3057 for (i=0; i<size; i++) { 3058 maxnz = PetscMax(maxnz,procsnz[i]); 3059 } 3060 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3061 3062 /* read in my part of the matrix column indices */ 3063 nz = procsnz[0]; 3064 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3065 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3066 3067 /* read in every one elses and ship off */ 3068 for (i=1; i<size; i++) { 3069 nz = procsnz[i]; 3070 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3071 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3072 } 3073 ierr = PetscFree(cols);CHKERRQ(ierr); 3074 } else { 3075 /* determine buffer space needed for message */ 3076 nz = 0; 3077 for (i=0; i<m; i++) { 3078 nz += ourlens[i]; 3079 } 3080 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3081 3082 /* receive message of column indices*/ 3083 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3084 } 3085 3086 /* determine column ownership if matrix is not square */ 3087 if (N != M) { 3088 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3089 else n = newMat->cmap->n; 3090 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3091 cstart = cend - n; 3092 } else { 3093 cstart = rstart; 3094 cend = rend; 3095 n = cend - cstart; 3096 } 3097 3098 /* loop over local rows, determining number of off diagonal entries */ 3099 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3100 jj = 0; 3101 for (i=0; i<m; i++) { 3102 for (j=0; j<ourlens[i]; j++) { 3103 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3104 jj++; 3105 } 3106 } 3107 3108 for (i=0; i<m; i++) { 3109 ourlens[i] -= offlens[i]; 3110 } 3111 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3112 3113 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3114 3115 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3116 3117 for (i=0; i<m; i++) { 3118 ourlens[i] += offlens[i]; 3119 } 3120 3121 if (!rank) { 3122 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3123 3124 /* read in my part of the matrix numerical values */ 3125 nz = procsnz[0]; 3126 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3127 3128 /* insert into matrix */ 3129 jj = rstart; 3130 smycols = mycols; 3131 svals = vals; 3132 for (i=0; i<m; i++) { 3133 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3134 smycols += ourlens[i]; 3135 svals += ourlens[i]; 3136 jj++; 3137 } 3138 3139 /* read in other processors and ship out */ 3140 for (i=1; i<size; i++) { 3141 nz = procsnz[i]; 3142 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3143 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3144 } 3145 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3146 } else { 3147 /* receive numeric values */ 3148 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3149 3150 /* receive message of values*/ 3151 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3152 3153 /* insert into matrix */ 3154 jj = rstart; 3155 smycols = mycols; 3156 svals = vals; 3157 for (i=0; i<m; i++) { 3158 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3159 smycols += ourlens[i]; 3160 svals += ourlens[i]; 3161 jj++; 3162 } 3163 } 3164 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3165 ierr = PetscFree(vals);CHKERRQ(ierr); 3166 ierr = PetscFree(mycols);CHKERRQ(ierr); 3167 ierr = PetscFree(rowners);CHKERRQ(ierr); 3168 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3170 PetscFunctionReturn(0); 3171 } 3172 3173 /* Not scalable because of ISAllGather() unless getting all columns. */ 3174 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3175 { 3176 PetscErrorCode ierr; 3177 IS iscol_local; 3178 PetscBool isstride; 3179 PetscMPIInt lisstride=0,gisstride; 3180 3181 PetscFunctionBegin; 3182 /* check if we are grabbing all columns*/ 3183 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3184 3185 if (isstride) { 3186 PetscInt start,len,mstart,mlen; 3187 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3188 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3189 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3190 if (mstart == start && mlen-mstart == len) lisstride = 1; 3191 } 3192 3193 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3194 if (gisstride) { 3195 PetscInt N; 3196 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3197 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3198 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3199 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3200 } else { 3201 PetscInt cbs; 3202 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3203 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3204 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3205 } 3206 3207 *isseq = iscol_local; 3208 PetscFunctionReturn(0); 3209 } 3210 3211 /* 3212 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3213 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3214 3215 Input Parameters: 3216 mat - matrix 3217 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3218 i.e., mat->rstart <= isrow[i] < mat->rend 3219 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3220 i.e., mat->cstart <= iscol[i] < mat->cend 3221 Output Parameter: 3222 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3223 iscol_o - sequential column index set for retrieving mat->B 3224 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3225 */ 3226 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3227 { 3228 PetscErrorCode ierr; 3229 Vec x,cmap; 3230 const PetscInt *is_idx; 3231 PetscScalar *xarray,*cmaparray; 3232 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3233 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3234 Mat B=a->B; 3235 Vec lvec=a->lvec,lcmap; 3236 PetscInt i,cstart,cend,Bn=B->cmap->N; 3237 MPI_Comm comm; 3238 VecScatter Mvctx=a->Mvctx; 3239 3240 PetscFunctionBegin; 3241 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3242 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3243 3244 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3245 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3246 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3247 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3248 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3249 3250 /* Get start indices */ 3251 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3252 isstart -= ncols; 3253 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3254 3255 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3256 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3257 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3259 for (i=0; i<ncols; i++) { 3260 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3261 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3262 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3263 } 3264 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3265 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3266 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3267 3268 /* Get iscol_d */ 3269 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3270 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3271 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3272 3273 /* Get isrow_d */ 3274 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3275 rstart = mat->rmap->rstart; 3276 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3277 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3278 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3279 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3280 3281 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3282 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3283 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3284 3285 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3286 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3287 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3288 3289 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3290 3291 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3292 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 3294 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3295 /* off-process column indices */ 3296 count = 0; 3297 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3298 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3299 3300 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3301 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3302 for (i=0; i<Bn; i++) { 3303 if (PetscRealPart(xarray[i]) > -1.0) { 3304 idx[count] = i; /* local column index in off-diagonal part B */ 3305 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3306 count++; 3307 } 3308 } 3309 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3310 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3311 3312 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3313 /* cannot ensure iscol_o has same blocksize as iscol! */ 3314 3315 ierr = PetscFree(idx);CHKERRQ(ierr); 3316 *garray = cmap1; 3317 3318 ierr = VecDestroy(&x);CHKERRQ(ierr); 3319 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3320 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3321 PetscFunctionReturn(0); 3322 } 3323 3324 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3325 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3326 { 3327 PetscErrorCode ierr; 3328 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3329 Mat M = NULL; 3330 MPI_Comm comm; 3331 IS iscol_d,isrow_d,iscol_o; 3332 Mat Asub = NULL,Bsub = NULL; 3333 PetscInt n; 3334 3335 PetscFunctionBegin; 3336 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3337 3338 if (call == MAT_REUSE_MATRIX) { 3339 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3340 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3341 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3342 3343 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3344 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3345 3346 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3347 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3348 3349 /* Update diagonal and off-diagonal portions of submat */ 3350 asub = (Mat_MPIAIJ*)(*submat)->data; 3351 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3352 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3353 if (n) { 3354 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3355 } 3356 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3357 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3358 3359 } else { /* call == MAT_INITIAL_MATRIX) */ 3360 const PetscInt *garray; 3361 PetscInt BsubN; 3362 3363 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3364 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3365 3366 /* Create local submatrices Asub and Bsub */ 3367 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3368 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3369 3370 /* Create submatrix M */ 3371 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3372 3373 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3374 asub = (Mat_MPIAIJ*)M->data; 3375 3376 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3377 n = asub->B->cmap->N; 3378 if (BsubN > n) { 3379 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3380 const PetscInt *idx; 3381 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3382 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3383 3384 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3385 j = 0; 3386 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3387 for (i=0; i<n; i++) { 3388 if (j >= BsubN) break; 3389 while (subgarray[i] > garray[j]) j++; 3390 3391 if (subgarray[i] == garray[j]) { 3392 idx_new[i] = idx[j++]; 3393 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3394 } 3395 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3396 3397 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3398 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3399 3400 } else if (BsubN < n) { 3401 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3402 } 3403 3404 ierr = PetscFree(garray);CHKERRQ(ierr); 3405 *submat = M; 3406 3407 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3408 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3409 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3410 3411 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3412 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3413 3414 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3415 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3416 } 3417 PetscFunctionReturn(0); 3418 } 3419 3420 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3421 { 3422 PetscErrorCode ierr; 3423 IS iscol_local=NULL,isrow_d; 3424 PetscInt csize; 3425 PetscInt n,i,j,start,end; 3426 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3427 MPI_Comm comm; 3428 3429 PetscFunctionBegin; 3430 /* If isrow has same processor distribution as mat, 3431 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3432 if (call == MAT_REUSE_MATRIX) { 3433 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3434 if (isrow_d) { 3435 sameRowDist = PETSC_TRUE; 3436 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3437 } else { 3438 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3439 if (iscol_local) { 3440 sameRowDist = PETSC_TRUE; 3441 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3442 } 3443 } 3444 } else { 3445 /* Check if isrow has same processor distribution as mat */ 3446 sameDist[0] = PETSC_FALSE; 3447 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3448 if (!n) { 3449 sameDist[0] = PETSC_TRUE; 3450 } else { 3451 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3452 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3453 if (i >= start && j < end) { 3454 sameDist[0] = PETSC_TRUE; 3455 } 3456 } 3457 3458 /* Check if iscol has same processor distribution as mat */ 3459 sameDist[1] = PETSC_FALSE; 3460 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3461 if (!n) { 3462 sameDist[1] = PETSC_TRUE; 3463 } else { 3464 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3465 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3466 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3467 } 3468 3469 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3470 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3471 sameRowDist = tsameDist[0]; 3472 } 3473 3474 if (sameRowDist) { 3475 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3476 /* isrow and iscol have same processor distribution as mat */ 3477 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } else { /* sameRowDist */ 3480 /* isrow has same processor distribution as mat */ 3481 if (call == MAT_INITIAL_MATRIX) { 3482 PetscBool sorted; 3483 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3484 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3485 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3486 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3487 3488 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3489 if (sorted) { 3490 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3491 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 } else { /* call == MAT_REUSE_MATRIX */ 3495 IS iscol_sub; 3496 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3497 if (iscol_sub) { 3498 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3499 PetscFunctionReturn(0); 3500 } 3501 } 3502 } 3503 } 3504 3505 /* General case: iscol -> iscol_local which has global size of iscol */ 3506 if (call == MAT_REUSE_MATRIX) { 3507 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3508 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3509 } else { 3510 if (!iscol_local) { 3511 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3512 } 3513 } 3514 3515 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3516 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3517 3518 if (call == MAT_INITIAL_MATRIX) { 3519 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3520 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3521 } 3522 PetscFunctionReturn(0); 3523 } 3524 3525 /*@C 3526 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3527 and "off-diagonal" part of the matrix in CSR format. 3528 3529 Collective 3530 3531 Input Parameters: 3532 + comm - MPI communicator 3533 . A - "diagonal" portion of matrix 3534 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3535 - garray - global index of B columns 3536 3537 Output Parameter: 3538 . mat - the matrix, with input A as its local diagonal matrix 3539 Level: advanced 3540 3541 Notes: 3542 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3543 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3544 3545 .seealso: MatCreateMPIAIJWithSplitArrays() 3546 @*/ 3547 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3548 { 3549 PetscErrorCode ierr; 3550 Mat_MPIAIJ *maij; 3551 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3552 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3553 PetscScalar *oa=b->a; 3554 Mat Bnew; 3555 PetscInt m,n,N; 3556 3557 PetscFunctionBegin; 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3560 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3561 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3562 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3563 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3564 3565 /* Get global columns of mat */ 3566 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 3568 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3569 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3570 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3571 maij = (Mat_MPIAIJ*)(*mat)->data; 3572 3573 (*mat)->preallocated = PETSC_TRUE; 3574 3575 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3576 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3577 3578 /* Set A as diagonal portion of *mat */ 3579 maij->A = A; 3580 3581 nz = oi[m]; 3582 for (i=0; i<nz; i++) { 3583 col = oj[i]; 3584 oj[i] = garray[col]; 3585 } 3586 3587 /* Set Bnew as off-diagonal portion of *mat */ 3588 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3589 bnew = (Mat_SeqAIJ*)Bnew->data; 3590 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3591 maij->B = Bnew; 3592 3593 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3594 3595 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3596 b->free_a = PETSC_FALSE; 3597 b->free_ij = PETSC_FALSE; 3598 ierr = MatDestroy(&B);CHKERRQ(ierr); 3599 3600 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3601 bnew->free_a = PETSC_TRUE; 3602 bnew->free_ij = PETSC_TRUE; 3603 3604 /* condense columns of maij->B */ 3605 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3606 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3609 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3610 PetscFunctionReturn(0); 3611 } 3612 3613 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3614 3615 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3616 { 3617 PetscErrorCode ierr; 3618 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3619 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3620 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3621 Mat M,Msub,B=a->B; 3622 MatScalar *aa; 3623 Mat_SeqAIJ *aij; 3624 PetscInt *garray = a->garray,*colsub,Ncols; 3625 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3626 IS iscol_sub,iscmap; 3627 const PetscInt *is_idx,*cmap; 3628 PetscBool allcolumns=PETSC_FALSE; 3629 MPI_Comm comm; 3630 3631 PetscFunctionBegin; 3632 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3633 3634 if (call == MAT_REUSE_MATRIX) { 3635 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3636 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3637 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3638 3639 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3640 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3641 3642 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3643 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3644 3645 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3646 3647 } else { /* call == MAT_INITIAL_MATRIX) */ 3648 PetscBool flg; 3649 3650 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3651 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3652 3653 /* (1) iscol -> nonscalable iscol_local */ 3654 /* Check for special case: each processor gets entire matrix columns */ 3655 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3656 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3657 if (allcolumns) { 3658 iscol_sub = iscol_local; 3659 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3660 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3661 3662 } else { 3663 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3664 PetscInt *idx,*cmap1,k; 3665 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3667 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3668 count = 0; 3669 k = 0; 3670 for (i=0; i<Ncols; i++) { 3671 j = is_idx[i]; 3672 if (j >= cstart && j < cend) { 3673 /* diagonal part of mat */ 3674 idx[count] = j; 3675 cmap1[count++] = i; /* column index in submat */ 3676 } else if (Bn) { 3677 /* off-diagonal part of mat */ 3678 if (j == garray[k]) { 3679 idx[count] = j; 3680 cmap1[count++] = i; /* column index in submat */ 3681 } else if (j > garray[k]) { 3682 while (j > garray[k] && k < Bn-1) k++; 3683 if (j == garray[k]) { 3684 idx[count] = j; 3685 cmap1[count++] = i; /* column index in submat */ 3686 } 3687 } 3688 } 3689 } 3690 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3691 3692 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3693 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3694 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3695 3696 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3697 } 3698 3699 /* (3) Create sequential Msub */ 3700 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3701 } 3702 3703 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3704 aij = (Mat_SeqAIJ*)(Msub)->data; 3705 ii = aij->i; 3706 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3707 3708 /* 3709 m - number of local rows 3710 Ncols - number of columns (same on all processors) 3711 rstart - first row in new global matrix generated 3712 */ 3713 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3714 3715 if (call == MAT_INITIAL_MATRIX) { 3716 /* (4) Create parallel newmat */ 3717 PetscMPIInt rank,size; 3718 PetscInt csize; 3719 3720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3721 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3722 3723 /* 3724 Determine the number of non-zeros in the diagonal and off-diagonal 3725 portions of the matrix in order to do correct preallocation 3726 */ 3727 3728 /* first get start and end of "diagonal" columns */ 3729 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3730 if (csize == PETSC_DECIDE) { 3731 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3732 if (mglobal == Ncols) { /* square matrix */ 3733 nlocal = m; 3734 } else { 3735 nlocal = Ncols/size + ((Ncols % size) > rank); 3736 } 3737 } else { 3738 nlocal = csize; 3739 } 3740 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3741 rstart = rend - nlocal; 3742 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3743 3744 /* next, compute all the lengths */ 3745 jj = aij->j; 3746 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3747 olens = dlens + m; 3748 for (i=0; i<m; i++) { 3749 jend = ii[i+1] - ii[i]; 3750 olen = 0; 3751 dlen = 0; 3752 for (j=0; j<jend; j++) { 3753 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3754 else dlen++; 3755 jj++; 3756 } 3757 olens[i] = olen; 3758 dlens[i] = dlen; 3759 } 3760 3761 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3762 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3763 3764 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3765 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3766 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3767 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3768 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3769 ierr = PetscFree(dlens);CHKERRQ(ierr); 3770 3771 } else { /* call == MAT_REUSE_MATRIX */ 3772 M = *newmat; 3773 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3774 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3775 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3776 /* 3777 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3778 rather than the slower MatSetValues(). 3779 */ 3780 M->was_assembled = PETSC_TRUE; 3781 M->assembled = PETSC_FALSE; 3782 } 3783 3784 /* (5) Set values of Msub to *newmat */ 3785 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3786 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3787 3788 jj = aij->j; 3789 aa = aij->a; 3790 for (i=0; i<m; i++) { 3791 row = rstart + i; 3792 nz = ii[i+1] - ii[i]; 3793 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3794 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3795 jj += nz; aa += nz; 3796 } 3797 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3798 3799 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3800 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3801 3802 ierr = PetscFree(colsub);CHKERRQ(ierr); 3803 3804 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3805 if (call == MAT_INITIAL_MATRIX) { 3806 *newmat = M; 3807 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3808 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3809 3810 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3811 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3812 3813 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3814 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3815 3816 if (iscol_local) { 3817 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3818 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3819 } 3820 } 3821 PetscFunctionReturn(0); 3822 } 3823 3824 /* 3825 Not great since it makes two copies of the submatrix, first an SeqAIJ 3826 in local and then by concatenating the local matrices the end result. 3827 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3828 3829 Note: This requires a sequential iscol with all indices. 3830 */ 3831 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3832 { 3833 PetscErrorCode ierr; 3834 PetscMPIInt rank,size; 3835 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3836 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3837 Mat M,Mreuse; 3838 MatScalar *aa,*vwork; 3839 MPI_Comm comm; 3840 Mat_SeqAIJ *aij; 3841 PetscBool colflag,allcolumns=PETSC_FALSE; 3842 3843 PetscFunctionBegin; 3844 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3845 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3846 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3847 3848 /* Check for special case: each processor gets entire matrix columns */ 3849 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3850 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3851 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3852 3853 if (call == MAT_REUSE_MATRIX) { 3854 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3855 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3856 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3857 } else { 3858 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3859 } 3860 3861 /* 3862 m - number of local rows 3863 n - number of columns (same on all processors) 3864 rstart - first row in new global matrix generated 3865 */ 3866 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3867 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3868 if (call == MAT_INITIAL_MATRIX) { 3869 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3870 ii = aij->i; 3871 jj = aij->j; 3872 3873 /* 3874 Determine the number of non-zeros in the diagonal and off-diagonal 3875 portions of the matrix in order to do correct preallocation 3876 */ 3877 3878 /* first get start and end of "diagonal" columns */ 3879 if (csize == PETSC_DECIDE) { 3880 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3881 if (mglobal == n) { /* square matrix */ 3882 nlocal = m; 3883 } else { 3884 nlocal = n/size + ((n % size) > rank); 3885 } 3886 } else { 3887 nlocal = csize; 3888 } 3889 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3890 rstart = rend - nlocal; 3891 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3892 3893 /* next, compute all the lengths */ 3894 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3895 olens = dlens + m; 3896 for (i=0; i<m; i++) { 3897 jend = ii[i+1] - ii[i]; 3898 olen = 0; 3899 dlen = 0; 3900 for (j=0; j<jend; j++) { 3901 if (*jj < rstart || *jj >= rend) olen++; 3902 else dlen++; 3903 jj++; 3904 } 3905 olens[i] = olen; 3906 dlens[i] = dlen; 3907 } 3908 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3909 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3910 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3911 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3912 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3913 ierr = PetscFree(dlens);CHKERRQ(ierr); 3914 } else { 3915 PetscInt ml,nl; 3916 3917 M = *newmat; 3918 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3919 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3920 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3921 /* 3922 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3923 rather than the slower MatSetValues(). 3924 */ 3925 M->was_assembled = PETSC_TRUE; 3926 M->assembled = PETSC_FALSE; 3927 } 3928 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3929 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3930 ii = aij->i; 3931 jj = aij->j; 3932 aa = aij->a; 3933 for (i=0; i<m; i++) { 3934 row = rstart + i; 3935 nz = ii[i+1] - ii[i]; 3936 cwork = jj; jj += nz; 3937 vwork = aa; aa += nz; 3938 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3939 } 3940 3941 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3942 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3943 *newmat = M; 3944 3945 /* save submatrix used in processor for next request */ 3946 if (call == MAT_INITIAL_MATRIX) { 3947 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3948 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3949 } 3950 PetscFunctionReturn(0); 3951 } 3952 3953 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3954 { 3955 PetscInt m,cstart, cend,j,nnz,i,d; 3956 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3957 const PetscInt *JJ; 3958 PetscErrorCode ierr; 3959 PetscBool nooffprocentries; 3960 3961 PetscFunctionBegin; 3962 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3963 3964 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3965 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3966 m = B->rmap->n; 3967 cstart = B->cmap->rstart; 3968 cend = B->cmap->rend; 3969 rstart = B->rmap->rstart; 3970 3971 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3972 3973 #if defined(PETSC_USE_DEBUG) 3974 for (i=0; i<m; i++) { 3975 nnz = Ii[i+1]- Ii[i]; 3976 JJ = J + Ii[i]; 3977 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3978 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3979 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3980 } 3981 #endif 3982 3983 for (i=0; i<m; i++) { 3984 nnz = Ii[i+1]- Ii[i]; 3985 JJ = J + Ii[i]; 3986 nnz_max = PetscMax(nnz_max,nnz); 3987 d = 0; 3988 for (j=0; j<nnz; j++) { 3989 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3990 } 3991 d_nnz[i] = d; 3992 o_nnz[i] = nnz - d; 3993 } 3994 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3995 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3996 3997 for (i=0; i<m; i++) { 3998 ii = i + rstart; 3999 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4000 } 4001 nooffprocentries = B->nooffprocentries; 4002 B->nooffprocentries = PETSC_TRUE; 4003 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4004 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4005 B->nooffprocentries = nooffprocentries; 4006 4007 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4008 PetscFunctionReturn(0); 4009 } 4010 4011 /*@ 4012 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4013 (the default parallel PETSc format). 4014 4015 Collective 4016 4017 Input Parameters: 4018 + B - the matrix 4019 . i - the indices into j for the start of each local row (starts with zero) 4020 . j - the column indices for each local row (starts with zero) 4021 - v - optional values in the matrix 4022 4023 Level: developer 4024 4025 Notes: 4026 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4027 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4028 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4029 4030 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4031 4032 The format which is used for the sparse matrix input, is equivalent to a 4033 row-major ordering.. i.e for the following matrix, the input data expected is 4034 as shown 4035 4036 $ 1 0 0 4037 $ 2 0 3 P0 4038 $ ------- 4039 $ 4 5 6 P1 4040 $ 4041 $ Process0 [P0]: rows_owned=[0,1] 4042 $ i = {0,1,3} [size = nrow+1 = 2+1] 4043 $ j = {0,0,2} [size = 3] 4044 $ v = {1,2,3} [size = 3] 4045 $ 4046 $ Process1 [P1]: rows_owned=[2] 4047 $ i = {0,3} [size = nrow+1 = 1+1] 4048 $ j = {0,1,2} [size = 3] 4049 $ v = {4,5,6} [size = 3] 4050 4051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4052 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4053 @*/ 4054 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4055 { 4056 PetscErrorCode ierr; 4057 4058 PetscFunctionBegin; 4059 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4060 PetscFunctionReturn(0); 4061 } 4062 4063 /*@C 4064 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4065 (the default parallel PETSc format). For good matrix assembly performance 4066 the user should preallocate the matrix storage by setting the parameters 4067 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4068 performance can be increased by more than a factor of 50. 4069 4070 Collective 4071 4072 Input Parameters: 4073 + B - the matrix 4074 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4075 (same value is used for all local rows) 4076 . d_nnz - array containing the number of nonzeros in the various rows of the 4077 DIAGONAL portion of the local submatrix (possibly different for each row) 4078 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4079 The size of this array is equal to the number of local rows, i.e 'm'. 4080 For matrices that will be factored, you must leave room for (and set) 4081 the diagonal entry even if it is zero. 4082 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4083 submatrix (same value is used for all local rows). 4084 - o_nnz - array containing the number of nonzeros in the various rows of the 4085 OFF-DIAGONAL portion of the local submatrix (possibly different for 4086 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4087 structure. The size of this array is equal to the number 4088 of local rows, i.e 'm'. 4089 4090 If the *_nnz parameter is given then the *_nz parameter is ignored 4091 4092 The AIJ format (also called the Yale sparse matrix format or 4093 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4094 storage. The stored row and column indices begin with zero. 4095 See Users-Manual: ch_mat for details. 4096 4097 The parallel matrix is partitioned such that the first m0 rows belong to 4098 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4099 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4100 4101 The DIAGONAL portion of the local submatrix of a processor can be defined 4102 as the submatrix which is obtained by extraction the part corresponding to 4103 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4104 first row that belongs to the processor, r2 is the last row belonging to 4105 the this processor, and c1-c2 is range of indices of the local part of a 4106 vector suitable for applying the matrix to. This is an mxn matrix. In the 4107 common case of a square matrix, the row and column ranges are the same and 4108 the DIAGONAL part is also square. The remaining portion of the local 4109 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4110 4111 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4112 4113 You can call MatGetInfo() to get information on how effective the preallocation was; 4114 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4115 You can also run with the option -info and look for messages with the string 4116 malloc in them to see if additional memory allocation was needed. 4117 4118 Example usage: 4119 4120 Consider the following 8x8 matrix with 34 non-zero values, that is 4121 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4122 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4123 as follows: 4124 4125 .vb 4126 1 2 0 | 0 3 0 | 0 4 4127 Proc0 0 5 6 | 7 0 0 | 8 0 4128 9 0 10 | 11 0 0 | 12 0 4129 ------------------------------------- 4130 13 0 14 | 15 16 17 | 0 0 4131 Proc1 0 18 0 | 19 20 21 | 0 0 4132 0 0 0 | 22 23 0 | 24 0 4133 ------------------------------------- 4134 Proc2 25 26 27 | 0 0 28 | 29 0 4135 30 0 0 | 31 32 33 | 0 34 4136 .ve 4137 4138 This can be represented as a collection of submatrices as: 4139 4140 .vb 4141 A B C 4142 D E F 4143 G H I 4144 .ve 4145 4146 Where the submatrices A,B,C are owned by proc0, D,E,F are 4147 owned by proc1, G,H,I are owned by proc2. 4148 4149 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4150 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4151 The 'M','N' parameters are 8,8, and have the same values on all procs. 4152 4153 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4154 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4155 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4156 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4157 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4158 matrix, ans [DF] as another SeqAIJ matrix. 4159 4160 When d_nz, o_nz parameters are specified, d_nz storage elements are 4161 allocated for every row of the local diagonal submatrix, and o_nz 4162 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4163 One way to choose d_nz and o_nz is to use the max nonzerors per local 4164 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4165 In this case, the values of d_nz,o_nz are: 4166 .vb 4167 proc0 : dnz = 2, o_nz = 2 4168 proc1 : dnz = 3, o_nz = 2 4169 proc2 : dnz = 1, o_nz = 4 4170 .ve 4171 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4172 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4173 for proc3. i.e we are using 12+15+10=37 storage locations to store 4174 34 values. 4175 4176 When d_nnz, o_nnz parameters are specified, the storage is specified 4177 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4178 In the above case the values for d_nnz,o_nnz are: 4179 .vb 4180 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4181 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4182 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4183 .ve 4184 Here the space allocated is sum of all the above values i.e 34, and 4185 hence pre-allocation is perfect. 4186 4187 Level: intermediate 4188 4189 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4190 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4191 @*/ 4192 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4193 { 4194 PetscErrorCode ierr; 4195 4196 PetscFunctionBegin; 4197 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4198 PetscValidType(B,1); 4199 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4200 PetscFunctionReturn(0); 4201 } 4202 4203 /*@ 4204 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4205 CSR format for the local rows. 4206 4207 Collective 4208 4209 Input Parameters: 4210 + comm - MPI communicator 4211 . m - number of local rows (Cannot be PETSC_DECIDE) 4212 . n - This value should be the same as the local size used in creating the 4213 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4214 calculated if N is given) For square matrices n is almost always m. 4215 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4216 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4217 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4218 . j - column indices 4219 - a - matrix values 4220 4221 Output Parameter: 4222 . mat - the matrix 4223 4224 Level: intermediate 4225 4226 Notes: 4227 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4228 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4229 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4230 4231 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4232 4233 The format which is used for the sparse matrix input, is equivalent to a 4234 row-major ordering.. i.e for the following matrix, the input data expected is 4235 as shown 4236 4237 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4238 4239 $ 1 0 0 4240 $ 2 0 3 P0 4241 $ ------- 4242 $ 4 5 6 P1 4243 $ 4244 $ Process0 [P0]: rows_owned=[0,1] 4245 $ i = {0,1,3} [size = nrow+1 = 2+1] 4246 $ j = {0,0,2} [size = 3] 4247 $ v = {1,2,3} [size = 3] 4248 $ 4249 $ Process1 [P1]: rows_owned=[2] 4250 $ i = {0,3} [size = nrow+1 = 1+1] 4251 $ j = {0,1,2} [size = 3] 4252 $ v = {4,5,6} [size = 3] 4253 4254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4255 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4256 @*/ 4257 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4258 { 4259 PetscErrorCode ierr; 4260 4261 PetscFunctionBegin; 4262 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4263 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4264 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4265 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4266 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4267 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4268 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4269 PetscFunctionReturn(0); 4270 } 4271 4272 /*@ 4273 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4274 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4275 4276 Collective 4277 4278 Input Parameters: 4279 + mat - the matrix 4280 . m - number of local rows (Cannot be PETSC_DECIDE) 4281 . n - This value should be the same as the local size used in creating the 4282 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4283 calculated if N is given) For square matrices n is almost always m. 4284 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4285 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4286 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4287 . J - column indices 4288 - v - matrix values 4289 4290 Level: intermediate 4291 4292 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4293 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4294 @*/ 4295 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4296 { 4297 PetscErrorCode ierr; 4298 PetscInt cstart,nnz,i,j; 4299 PetscInt *ld; 4300 PetscBool nooffprocentries; 4301 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4303 PetscScalar *ad = Ad->a, *ao = Ao->a; 4304 const PetscInt *Adi = Ad->i; 4305 PetscInt ldi,Iii,md; 4306 4307 PetscFunctionBegin; 4308 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4309 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4310 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4311 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4312 4313 cstart = mat->cmap->rstart; 4314 if (!Aij->ld) { 4315 /* count number of entries below block diagonal */ 4316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4317 Aij->ld = ld; 4318 for (i=0; i<m; i++) { 4319 nnz = Ii[i+1]- Ii[i]; 4320 j = 0; 4321 while (J[j] < cstart && j < nnz) {j++;} 4322 J += nnz; 4323 ld[i] = j; 4324 } 4325 } else { 4326 ld = Aij->ld; 4327 } 4328 4329 for (i=0; i<m; i++) { 4330 nnz = Ii[i+1]- Ii[i]; 4331 Iii = Ii[i]; 4332 ldi = ld[i]; 4333 md = Adi[i+1]-Adi[i]; 4334 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4335 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4336 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4337 ad += md; 4338 ao += nnz - md; 4339 } 4340 nooffprocentries = mat->nooffprocentries; 4341 mat->nooffprocentries = PETSC_TRUE; 4342 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4343 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4344 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4345 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4346 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4347 mat->nooffprocentries = nooffprocentries; 4348 PetscFunctionReturn(0); 4349 } 4350 4351 /*@C 4352 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4353 (the default parallel PETSc format). For good matrix assembly performance 4354 the user should preallocate the matrix storage by setting the parameters 4355 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4356 performance can be increased by more than a factor of 50. 4357 4358 Collective 4359 4360 Input Parameters: 4361 + comm - MPI communicator 4362 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4363 This value should be the same as the local size used in creating the 4364 y vector for the matrix-vector product y = Ax. 4365 . n - This value should be the same as the local size used in creating the 4366 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4367 calculated if N is given) For square matrices n is almost always m. 4368 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4369 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4370 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4371 (same value is used for all local rows) 4372 . d_nnz - array containing the number of nonzeros in the various rows of the 4373 DIAGONAL portion of the local submatrix (possibly different for each row) 4374 or NULL, if d_nz is used to specify the nonzero structure. 4375 The size of this array is equal to the number of local rows, i.e 'm'. 4376 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4377 submatrix (same value is used for all local rows). 4378 - o_nnz - array containing the number of nonzeros in the various rows of the 4379 OFF-DIAGONAL portion of the local submatrix (possibly different for 4380 each row) or NULL, if o_nz is used to specify the nonzero 4381 structure. The size of this array is equal to the number 4382 of local rows, i.e 'm'. 4383 4384 Output Parameter: 4385 . A - the matrix 4386 4387 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4388 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4389 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4390 4391 Notes: 4392 If the *_nnz parameter is given then the *_nz parameter is ignored 4393 4394 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4395 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4396 storage requirements for this matrix. 4397 4398 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4399 processor than it must be used on all processors that share the object for 4400 that argument. 4401 4402 The user MUST specify either the local or global matrix dimensions 4403 (possibly both). 4404 4405 The parallel matrix is partitioned across processors such that the 4406 first m0 rows belong to process 0, the next m1 rows belong to 4407 process 1, the next m2 rows belong to process 2 etc.. where 4408 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4409 values corresponding to [m x N] submatrix. 4410 4411 The columns are logically partitioned with the n0 columns belonging 4412 to 0th partition, the next n1 columns belonging to the next 4413 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4414 4415 The DIAGONAL portion of the local submatrix on any given processor 4416 is the submatrix corresponding to the rows and columns m,n 4417 corresponding to the given processor. i.e diagonal matrix on 4418 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4419 etc. The remaining portion of the local submatrix [m x (N-n)] 4420 constitute the OFF-DIAGONAL portion. The example below better 4421 illustrates this concept. 4422 4423 For a square global matrix we define each processor's diagonal portion 4424 to be its local rows and the corresponding columns (a square submatrix); 4425 each processor's off-diagonal portion encompasses the remainder of the 4426 local matrix (a rectangular submatrix). 4427 4428 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4429 4430 When calling this routine with a single process communicator, a matrix of 4431 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4432 type of communicator, use the construction mechanism 4433 .vb 4434 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4435 .ve 4436 4437 $ MatCreate(...,&A); 4438 $ MatSetType(A,MATMPIAIJ); 4439 $ MatSetSizes(A, m,n,M,N); 4440 $ MatMPIAIJSetPreallocation(A,...); 4441 4442 By default, this format uses inodes (identical nodes) when possible. 4443 We search for consecutive rows with the same nonzero structure, thereby 4444 reusing matrix information to achieve increased efficiency. 4445 4446 Options Database Keys: 4447 + -mat_no_inode - Do not use inodes 4448 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4449 4450 4451 4452 Example usage: 4453 4454 Consider the following 8x8 matrix with 34 non-zero values, that is 4455 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4456 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4457 as follows 4458 4459 .vb 4460 1 2 0 | 0 3 0 | 0 4 4461 Proc0 0 5 6 | 7 0 0 | 8 0 4462 9 0 10 | 11 0 0 | 12 0 4463 ------------------------------------- 4464 13 0 14 | 15 16 17 | 0 0 4465 Proc1 0 18 0 | 19 20 21 | 0 0 4466 0 0 0 | 22 23 0 | 24 0 4467 ------------------------------------- 4468 Proc2 25 26 27 | 0 0 28 | 29 0 4469 30 0 0 | 31 32 33 | 0 34 4470 .ve 4471 4472 This can be represented as a collection of submatrices as 4473 4474 .vb 4475 A B C 4476 D E F 4477 G H I 4478 .ve 4479 4480 Where the submatrices A,B,C are owned by proc0, D,E,F are 4481 owned by proc1, G,H,I are owned by proc2. 4482 4483 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4484 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4485 The 'M','N' parameters are 8,8, and have the same values on all procs. 4486 4487 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4488 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4489 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4490 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4491 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4492 matrix, ans [DF] as another SeqAIJ matrix. 4493 4494 When d_nz, o_nz parameters are specified, d_nz storage elements are 4495 allocated for every row of the local diagonal submatrix, and o_nz 4496 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4497 One way to choose d_nz and o_nz is to use the max nonzerors per local 4498 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4499 In this case, the values of d_nz,o_nz are 4500 .vb 4501 proc0 : dnz = 2, o_nz = 2 4502 proc1 : dnz = 3, o_nz = 2 4503 proc2 : dnz = 1, o_nz = 4 4504 .ve 4505 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4506 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4507 for proc3. i.e we are using 12+15+10=37 storage locations to store 4508 34 values. 4509 4510 When d_nnz, o_nnz parameters are specified, the storage is specified 4511 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4512 In the above case the values for d_nnz,o_nnz are 4513 .vb 4514 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4515 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4516 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4517 .ve 4518 Here the space allocated is sum of all the above values i.e 34, and 4519 hence pre-allocation is perfect. 4520 4521 Level: intermediate 4522 4523 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4524 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4525 @*/ 4526 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4527 { 4528 PetscErrorCode ierr; 4529 PetscMPIInt size; 4530 4531 PetscFunctionBegin; 4532 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4533 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4535 if (size > 1) { 4536 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4537 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4538 } else { 4539 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4540 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4541 } 4542 PetscFunctionReturn(0); 4543 } 4544 4545 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4546 { 4547 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4548 PetscBool flg; 4549 PetscErrorCode ierr; 4550 4551 PetscFunctionBegin; 4552 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4553 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4554 if (Ad) *Ad = a->A; 4555 if (Ao) *Ao = a->B; 4556 if (colmap) *colmap = a->garray; 4557 PetscFunctionReturn(0); 4558 } 4559 4560 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4561 { 4562 PetscErrorCode ierr; 4563 PetscInt m,N,i,rstart,nnz,Ii; 4564 PetscInt *indx; 4565 PetscScalar *values; 4566 4567 PetscFunctionBegin; 4568 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4569 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4570 PetscInt *dnz,*onz,sum,bs,cbs; 4571 4572 if (n == PETSC_DECIDE) { 4573 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4574 } 4575 /* Check sum(n) = N */ 4576 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4577 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4578 4579 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4580 rstart -= m; 4581 4582 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4583 for (i=0; i<m; i++) { 4584 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4585 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4586 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4587 } 4588 4589 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4590 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4591 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4592 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4593 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4594 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4595 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4596 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4597 } 4598 4599 /* numeric phase */ 4600 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4601 for (i=0; i<m; i++) { 4602 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4603 Ii = i + rstart; 4604 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4605 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4606 } 4607 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4608 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4613 { 4614 PetscErrorCode ierr; 4615 PetscMPIInt rank; 4616 PetscInt m,N,i,rstart,nnz; 4617 size_t len; 4618 const PetscInt *indx; 4619 PetscViewer out; 4620 char *name; 4621 Mat B; 4622 const PetscScalar *values; 4623 4624 PetscFunctionBegin; 4625 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4626 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4627 /* Should this be the type of the diagonal block of A? */ 4628 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4629 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4630 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4631 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4632 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4633 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4634 for (i=0; i<m; i++) { 4635 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4636 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4637 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4638 } 4639 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4640 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 4642 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4643 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4644 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4645 sprintf(name,"%s.%d",outfile,rank); 4646 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4647 ierr = PetscFree(name);CHKERRQ(ierr); 4648 ierr = MatView(B,out);CHKERRQ(ierr); 4649 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4650 ierr = MatDestroy(&B);CHKERRQ(ierr); 4651 PetscFunctionReturn(0); 4652 } 4653 4654 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4655 { 4656 PetscErrorCode ierr; 4657 Mat_Merge_SeqsToMPI *merge; 4658 PetscContainer container; 4659 4660 PetscFunctionBegin; 4661 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4662 if (container) { 4663 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4669 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4670 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4671 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4672 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4673 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4674 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4675 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4676 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4677 ierr = PetscFree(merge);CHKERRQ(ierr); 4678 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4679 } 4680 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4681 PetscFunctionReturn(0); 4682 } 4683 4684 #include <../src/mat/utils/freespace.h> 4685 #include <petscbt.h> 4686 4687 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4688 { 4689 PetscErrorCode ierr; 4690 MPI_Comm comm; 4691 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4692 PetscMPIInt size,rank,taga,*len_s; 4693 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4694 PetscInt proc,m; 4695 PetscInt **buf_ri,**buf_rj; 4696 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4697 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4698 MPI_Request *s_waits,*r_waits; 4699 MPI_Status *status; 4700 MatScalar *aa=a->a; 4701 MatScalar **abuf_r,*ba_i; 4702 Mat_Merge_SeqsToMPI *merge; 4703 PetscContainer container; 4704 4705 PetscFunctionBegin; 4706 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4707 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4708 4709 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4711 4712 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4713 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4714 4715 bi = merge->bi; 4716 bj = merge->bj; 4717 buf_ri = merge->buf_ri; 4718 buf_rj = merge->buf_rj; 4719 4720 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4721 owners = merge->rowmap->range; 4722 len_s = merge->len_s; 4723 4724 /* send and recv matrix values */ 4725 /*-----------------------------*/ 4726 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4728 4729 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4730 for (proc=0,k=0; proc<size; proc++) { 4731 if (!len_s[proc]) continue; 4732 i = owners[proc]; 4733 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4734 k++; 4735 } 4736 4737 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4738 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4739 ierr = PetscFree(status);CHKERRQ(ierr); 4740 4741 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4742 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4743 4744 /* insert mat values of mpimat */ 4745 /*----------------------------*/ 4746 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4747 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4748 4749 for (k=0; k<merge->nrecv; k++) { 4750 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4751 nrows = *(buf_ri_k[k]); 4752 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4753 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4754 } 4755 4756 /* set values of ba */ 4757 m = merge->rowmap->n; 4758 for (i=0; i<m; i++) { 4759 arow = owners[rank] + i; 4760 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4761 bnzi = bi[i+1] - bi[i]; 4762 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4763 4764 /* add local non-zero vals of this proc's seqmat into ba */ 4765 anzi = ai[arow+1] - ai[arow]; 4766 aj = a->j + ai[arow]; 4767 aa = a->a + ai[arow]; 4768 nextaj = 0; 4769 for (j=0; nextaj<anzi; j++) { 4770 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4771 ba_i[j] += aa[nextaj++]; 4772 } 4773 } 4774 4775 /* add received vals into ba */ 4776 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4777 /* i-th row */ 4778 if (i == *nextrow[k]) { 4779 anzi = *(nextai[k]+1) - *nextai[k]; 4780 aj = buf_rj[k] + *(nextai[k]); 4781 aa = abuf_r[k] + *(nextai[k]); 4782 nextaj = 0; 4783 for (j=0; nextaj<anzi; j++) { 4784 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4785 ba_i[j] += aa[nextaj++]; 4786 } 4787 } 4788 nextrow[k]++; nextai[k]++; 4789 } 4790 } 4791 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4792 } 4793 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4794 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4795 4796 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4797 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4798 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4799 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4800 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4801 PetscFunctionReturn(0); 4802 } 4803 4804 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4805 { 4806 PetscErrorCode ierr; 4807 Mat B_mpi; 4808 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4809 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4810 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4811 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4812 PetscInt len,proc,*dnz,*onz,bs,cbs; 4813 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4814 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4815 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4816 MPI_Status *status; 4817 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4818 PetscBT lnkbt; 4819 Mat_Merge_SeqsToMPI *merge; 4820 PetscContainer container; 4821 4822 PetscFunctionBegin; 4823 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4824 4825 /* make sure it is a PETSc comm */ 4826 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4828 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4829 4830 ierr = PetscNew(&merge);CHKERRQ(ierr); 4831 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4832 4833 /* determine row ownership */ 4834 /*---------------------------------------------------------*/ 4835 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4836 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4837 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4838 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4839 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4840 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4841 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4842 4843 m = merge->rowmap->n; 4844 owners = merge->rowmap->range; 4845 4846 /* determine the number of messages to send, their lengths */ 4847 /*---------------------------------------------------------*/ 4848 len_s = merge->len_s; 4849 4850 len = 0; /* length of buf_si[] */ 4851 merge->nsend = 0; 4852 for (proc=0; proc<size; proc++) { 4853 len_si[proc] = 0; 4854 if (proc == rank) { 4855 len_s[proc] = 0; 4856 } else { 4857 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4858 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4859 } 4860 if (len_s[proc]) { 4861 merge->nsend++; 4862 nrows = 0; 4863 for (i=owners[proc]; i<owners[proc+1]; i++) { 4864 if (ai[i+1] > ai[i]) nrows++; 4865 } 4866 len_si[proc] = 2*(nrows+1); 4867 len += len_si[proc]; 4868 } 4869 } 4870 4871 /* determine the number and length of messages to receive for ij-structure */ 4872 /*-------------------------------------------------------------------------*/ 4873 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4874 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4875 4876 /* post the Irecv of j-structure */ 4877 /*-------------------------------*/ 4878 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4879 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4880 4881 /* post the Isend of j-structure */ 4882 /*--------------------------------*/ 4883 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4884 4885 for (proc=0, k=0; proc<size; proc++) { 4886 if (!len_s[proc]) continue; 4887 i = owners[proc]; 4888 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4889 k++; 4890 } 4891 4892 /* receives and sends of j-structure are complete */ 4893 /*------------------------------------------------*/ 4894 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4895 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4896 4897 /* send and recv i-structure */ 4898 /*---------------------------*/ 4899 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4900 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4901 4902 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4903 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4904 for (proc=0,k=0; proc<size; proc++) { 4905 if (!len_s[proc]) continue; 4906 /* form outgoing message for i-structure: 4907 buf_si[0]: nrows to be sent 4908 [1:nrows]: row index (global) 4909 [nrows+1:2*nrows+1]: i-structure index 4910 */ 4911 /*-------------------------------------------*/ 4912 nrows = len_si[proc]/2 - 1; 4913 buf_si_i = buf_si + nrows+1; 4914 buf_si[0] = nrows; 4915 buf_si_i[0] = 0; 4916 nrows = 0; 4917 for (i=owners[proc]; i<owners[proc+1]; i++) { 4918 anzi = ai[i+1] - ai[i]; 4919 if (anzi) { 4920 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4921 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4922 nrows++; 4923 } 4924 } 4925 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4926 k++; 4927 buf_si += len_si[proc]; 4928 } 4929 4930 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4931 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4932 4933 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4934 for (i=0; i<merge->nrecv; i++) { 4935 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4936 } 4937 4938 ierr = PetscFree(len_si);CHKERRQ(ierr); 4939 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4940 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4941 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4942 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4943 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4944 ierr = PetscFree(status);CHKERRQ(ierr); 4945 4946 /* compute a local seq matrix in each processor */ 4947 /*----------------------------------------------*/ 4948 /* allocate bi array and free space for accumulating nonzero column info */ 4949 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4950 bi[0] = 0; 4951 4952 /* create and initialize a linked list */ 4953 nlnk = N+1; 4954 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4955 4956 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4957 len = ai[owners[rank+1]] - ai[owners[rank]]; 4958 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4959 4960 current_space = free_space; 4961 4962 /* determine symbolic info for each local row */ 4963 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4964 4965 for (k=0; k<merge->nrecv; k++) { 4966 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4967 nrows = *buf_ri_k[k]; 4968 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4969 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4970 } 4971 4972 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4973 len = 0; 4974 for (i=0; i<m; i++) { 4975 bnzi = 0; 4976 /* add local non-zero cols of this proc's seqmat into lnk */ 4977 arow = owners[rank] + i; 4978 anzi = ai[arow+1] - ai[arow]; 4979 aj = a->j + ai[arow]; 4980 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4981 bnzi += nlnk; 4982 /* add received col data into lnk */ 4983 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4984 if (i == *nextrow[k]) { /* i-th row */ 4985 anzi = *(nextai[k]+1) - *nextai[k]; 4986 aj = buf_rj[k] + *nextai[k]; 4987 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4988 bnzi += nlnk; 4989 nextrow[k]++; nextai[k]++; 4990 } 4991 } 4992 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4993 4994 /* if free space is not available, make more free space */ 4995 if (current_space->local_remaining<bnzi) { 4996 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4997 nspacedouble++; 4998 } 4999 /* copy data into free space, then initialize lnk */ 5000 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5001 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5002 5003 current_space->array += bnzi; 5004 current_space->local_used += bnzi; 5005 current_space->local_remaining -= bnzi; 5006 5007 bi[i+1] = bi[i] + bnzi; 5008 } 5009 5010 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5011 5012 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5013 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5014 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5015 5016 /* create symbolic parallel matrix B_mpi */ 5017 /*---------------------------------------*/ 5018 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5019 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5020 if (n==PETSC_DECIDE) { 5021 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5022 } else { 5023 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5024 } 5025 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5026 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5027 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5028 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5029 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5030 5031 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5032 B_mpi->assembled = PETSC_FALSE; 5033 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5034 merge->bi = bi; 5035 merge->bj = bj; 5036 merge->buf_ri = buf_ri; 5037 merge->buf_rj = buf_rj; 5038 merge->coi = NULL; 5039 merge->coj = NULL; 5040 merge->owners_co = NULL; 5041 5042 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5043 5044 /* attach the supporting struct to B_mpi for reuse */ 5045 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5046 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5047 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5048 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5049 *mpimat = B_mpi; 5050 5051 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5052 PetscFunctionReturn(0); 5053 } 5054 5055 /*@C 5056 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5057 matrices from each processor 5058 5059 Collective 5060 5061 Input Parameters: 5062 + comm - the communicators the parallel matrix will live on 5063 . seqmat - the input sequential matrices 5064 . m - number of local rows (or PETSC_DECIDE) 5065 . n - number of local columns (or PETSC_DECIDE) 5066 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5067 5068 Output Parameter: 5069 . mpimat - the parallel matrix generated 5070 5071 Level: advanced 5072 5073 Notes: 5074 The dimensions of the sequential matrix in each processor MUST be the same. 5075 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5076 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5077 @*/ 5078 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5079 { 5080 PetscErrorCode ierr; 5081 PetscMPIInt size; 5082 5083 PetscFunctionBegin; 5084 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5085 if (size == 1) { 5086 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5089 } else { 5090 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5091 } 5092 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5096 if (scall == MAT_INITIAL_MATRIX) { 5097 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5098 } 5099 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5100 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 /*@ 5105 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5106 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5107 with MatGetSize() 5108 5109 Not Collective 5110 5111 Input Parameters: 5112 + A - the matrix 5113 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5114 5115 Output Parameter: 5116 . A_loc - the local sequential matrix generated 5117 5118 Level: developer 5119 5120 Notes: 5121 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5122 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5123 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5124 modify the values of the returned A_loc. 5125 5126 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5127 5128 @*/ 5129 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5130 { 5131 PetscErrorCode ierr; 5132 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5133 Mat_SeqAIJ *mat,*a,*b; 5134 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5135 MatScalar *aa,*ba,*cam; 5136 PetscScalar *ca; 5137 PetscMPIInt size; 5138 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5139 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5140 PetscBool match; 5141 5142 PetscFunctionBegin; 5143 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5144 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5145 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5146 if (size == 1) { 5147 if (scall == MAT_INITIAL_MATRIX) { 5148 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5149 *A_loc = mpimat->A; 5150 } else if (scall == MAT_REUSE_MATRIX) { 5151 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5152 } 5153 PetscFunctionReturn(0); 5154 } 5155 5156 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5157 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5158 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5159 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5160 aa = a->a; ba = b->a; 5161 if (scall == MAT_INITIAL_MATRIX) { 5162 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5163 ci[0] = 0; 5164 for (i=0; i<am; i++) { 5165 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5166 } 5167 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5168 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5169 k = 0; 5170 for (i=0; i<am; i++) { 5171 ncols_o = bi[i+1] - bi[i]; 5172 ncols_d = ai[i+1] - ai[i]; 5173 /* off-diagonal portion of A */ 5174 for (jo=0; jo<ncols_o; jo++) { 5175 col = cmap[*bj]; 5176 if (col >= cstart) break; 5177 cj[k] = col; bj++; 5178 ca[k++] = *ba++; 5179 } 5180 /* diagonal portion of A */ 5181 for (j=0; j<ncols_d; j++) { 5182 cj[k] = cstart + *aj++; 5183 ca[k++] = *aa++; 5184 } 5185 /* off-diagonal portion of A */ 5186 for (j=jo; j<ncols_o; j++) { 5187 cj[k] = cmap[*bj++]; 5188 ca[k++] = *ba++; 5189 } 5190 } 5191 /* put together the new matrix */ 5192 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5193 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5194 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5195 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5196 mat->free_a = PETSC_TRUE; 5197 mat->free_ij = PETSC_TRUE; 5198 mat->nonew = 0; 5199 } else if (scall == MAT_REUSE_MATRIX) { 5200 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5201 ci = mat->i; cj = mat->j; cam = mat->a; 5202 for (i=0; i<am; i++) { 5203 /* off-diagonal portion of A */ 5204 ncols_o = bi[i+1] - bi[i]; 5205 for (jo=0; jo<ncols_o; jo++) { 5206 col = cmap[*bj]; 5207 if (col >= cstart) break; 5208 *cam++ = *ba++; bj++; 5209 } 5210 /* diagonal portion of A */ 5211 ncols_d = ai[i+1] - ai[i]; 5212 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5213 /* off-diagonal portion of A */ 5214 for (j=jo; j<ncols_o; j++) { 5215 *cam++ = *ba++; bj++; 5216 } 5217 } 5218 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5219 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5220 PetscFunctionReturn(0); 5221 } 5222 5223 /*@C 5224 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5225 5226 Not Collective 5227 5228 Input Parameters: 5229 + A - the matrix 5230 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5231 - row, col - index sets of rows and columns to extract (or NULL) 5232 5233 Output Parameter: 5234 . A_loc - the local sequential matrix generated 5235 5236 Level: developer 5237 5238 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5239 5240 @*/ 5241 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5242 { 5243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5244 PetscErrorCode ierr; 5245 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5246 IS isrowa,iscola; 5247 Mat *aloc; 5248 PetscBool match; 5249 5250 PetscFunctionBegin; 5251 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5252 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5253 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5254 if (!row) { 5255 start = A->rmap->rstart; end = A->rmap->rend; 5256 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5257 } else { 5258 isrowa = *row; 5259 } 5260 if (!col) { 5261 start = A->cmap->rstart; 5262 cmap = a->garray; 5263 nzA = a->A->cmap->n; 5264 nzB = a->B->cmap->n; 5265 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5266 ncols = 0; 5267 for (i=0; i<nzB; i++) { 5268 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5269 else break; 5270 } 5271 imark = i; 5272 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5273 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5274 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5275 } else { 5276 iscola = *col; 5277 } 5278 if (scall != MAT_INITIAL_MATRIX) { 5279 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5280 aloc[0] = *A_loc; 5281 } 5282 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5283 if (!col) { /* attach global id of condensed columns */ 5284 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5285 } 5286 *A_loc = aloc[0]; 5287 ierr = PetscFree(aloc);CHKERRQ(ierr); 5288 if (!row) { 5289 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5290 } 5291 if (!col) { 5292 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5293 } 5294 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5295 PetscFunctionReturn(0); 5296 } 5297 5298 /* 5299 * Destroy a mat that may be composed with PetscSF communication objects. 5300 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5301 * */ 5302 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5303 { 5304 PetscSF sf,osf; 5305 IS map; 5306 PetscErrorCode ierr; 5307 5308 PetscFunctionBegin; 5309 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5310 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5311 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5312 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5313 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5314 ierr = ISDestroy(&map);CHKERRQ(ierr); 5315 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5316 PetscFunctionReturn(0); 5317 } 5318 5319 /* 5320 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5321 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5322 * on a global size. 5323 * */ 5324 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5325 { 5326 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5327 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5328 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5329 PetscMPIInt owner; 5330 PetscSFNode *iremote,*oiremote; 5331 const PetscInt *lrowindices; 5332 PetscErrorCode ierr; 5333 PetscSF sf,osf; 5334 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5335 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5336 MPI_Comm comm; 5337 ISLocalToGlobalMapping mapping; 5338 5339 PetscFunctionBegin; 5340 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5341 /* plocalsize is the number of roots 5342 * nrows is the number of leaves 5343 * */ 5344 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5345 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5346 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5347 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5348 for (i=0;i<nrows;i++) { 5349 /* Find a remote index and an owner for a row 5350 * The row could be local or remote 5351 * */ 5352 owner = 0; 5353 lidx = 0; 5354 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5355 iremote[i].index = lidx; 5356 iremote[i].rank = owner; 5357 } 5358 /* Create SF to communicate how many nonzero columns for each row */ 5359 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5360 /* SF will figure out the number of nonzero colunms for each row, and their 5361 * offsets 5362 * */ 5363 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5364 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5365 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5366 5367 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5368 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5369 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5370 roffsets[0] = 0; 5371 roffsets[1] = 0; 5372 for (i=0;i<plocalsize;i++) { 5373 /* diag */ 5374 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5375 /* off diag */ 5376 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5377 /* compute offsets so that we relative location for each row */ 5378 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5379 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5380 } 5381 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5383 /* 'r' means root, and 'l' means leaf */ 5384 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5385 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5386 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5387 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5388 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5389 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5390 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5391 dntotalcols = 0; 5392 ontotalcols = 0; 5393 ncol = 0; 5394 for (i=0;i<nrows;i++) { 5395 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5396 ncol = PetscMax(pnnz[i],ncol); 5397 /* diag */ 5398 dntotalcols += nlcols[i*2+0]; 5399 /* off diag */ 5400 ontotalcols += nlcols[i*2+1]; 5401 } 5402 /* We do not need to figure the right number of columns 5403 * since all the calculations will be done by going through the raw data 5404 * */ 5405 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5406 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5407 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5408 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5409 /* diag */ 5410 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5411 /* off diag */ 5412 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5413 /* diag */ 5414 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5415 /* off diag */ 5416 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5417 dntotalcols = 0; 5418 ontotalcols = 0; 5419 ntotalcols = 0; 5420 for (i=0;i<nrows;i++) { 5421 owner = 0; 5422 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5423 /* Set iremote for diag matrix */ 5424 for (j=0;j<nlcols[i*2+0];j++) { 5425 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5426 iremote[dntotalcols].rank = owner; 5427 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5428 ilocal[dntotalcols++] = ntotalcols++; 5429 } 5430 /* off diag */ 5431 for (j=0;j<nlcols[i*2+1];j++) { 5432 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5433 oiremote[ontotalcols].rank = owner; 5434 oilocal[ontotalcols++] = ntotalcols++; 5435 } 5436 } 5437 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5438 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5439 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5440 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5441 /* P serves as roots and P_oth is leaves 5442 * Diag matrix 5443 * */ 5444 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5445 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5446 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5447 5448 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5449 /* Off diag */ 5450 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5451 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5452 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5453 /* We operate on the matrix internal data for saving memory */ 5454 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5455 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5456 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5457 /* Convert to global indices for diag matrix */ 5458 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5459 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5460 /* We want P_oth store global indices */ 5461 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5462 /* Use memory scalable approach */ 5463 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5464 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5465 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5466 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5467 /* Convert back to local indices */ 5468 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5469 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5470 nout = 0; 5471 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5472 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5473 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5474 /* Exchange values */ 5475 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5476 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5477 /* Stop PETSc from shrinking memory */ 5478 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5479 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5480 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5482 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5483 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5484 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5485 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5486 PetscFunctionReturn(0); 5487 } 5488 5489 /* 5490 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5491 * This supports MPIAIJ and MAIJ 5492 * */ 5493 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5494 { 5495 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5496 Mat_SeqAIJ *p_oth; 5497 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5498 IS rows,map; 5499 PetscHMapI hamp; 5500 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5501 MPI_Comm comm; 5502 PetscSF sf,osf; 5503 PetscBool has; 5504 PetscErrorCode ierr; 5505 5506 PetscFunctionBegin; 5507 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5508 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5509 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5510 * and then create a submatrix (that often is an overlapping matrix) 5511 * */ 5512 if (reuse==MAT_INITIAL_MATRIX) { 5513 /* Use a hash table to figure out unique keys */ 5514 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5515 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5516 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5517 count = 0; 5518 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5519 for (i=0;i<a->B->cmap->n;i++) { 5520 key = a->garray[i]/dof; 5521 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5522 if (!has) { 5523 mapping[i] = count; 5524 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5525 } else { 5526 /* Current 'i' has the same value the previous step */ 5527 mapping[i] = count-1; 5528 } 5529 } 5530 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5531 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5532 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5533 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5534 off = 0; 5535 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5536 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5537 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5538 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5539 /* In case, the matrix was already created but users want to recreate the matrix */ 5540 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5541 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5542 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5543 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5544 } else if (reuse==MAT_REUSE_MATRIX) { 5545 /* If matrix was already created, we simply update values using SF objects 5546 * that as attached to the matrix ealier. 5547 * */ 5548 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5549 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5550 if (!sf || !osf) { 5551 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5552 } 5553 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5554 /* Update values in place */ 5555 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5556 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5557 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5558 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5559 } else { 5560 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5561 } 5562 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5563 PetscFunctionReturn(0); 5564 } 5565 5566 /*@C 5567 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5568 5569 Collective on Mat 5570 5571 Input Parameters: 5572 + A,B - the matrices in mpiaij format 5573 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5574 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5575 5576 Output Parameter: 5577 + rowb, colb - index sets of rows and columns of B to extract 5578 - B_seq - the sequential matrix generated 5579 5580 Level: developer 5581 5582 @*/ 5583 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5584 { 5585 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5586 PetscErrorCode ierr; 5587 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5588 IS isrowb,iscolb; 5589 Mat *bseq=NULL; 5590 5591 PetscFunctionBegin; 5592 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5593 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5594 } 5595 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5596 5597 if (scall == MAT_INITIAL_MATRIX) { 5598 start = A->cmap->rstart; 5599 cmap = a->garray; 5600 nzA = a->A->cmap->n; 5601 nzB = a->B->cmap->n; 5602 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5603 ncols = 0; 5604 for (i=0; i<nzB; i++) { /* row < local row index */ 5605 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5606 else break; 5607 } 5608 imark = i; 5609 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5610 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5611 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5612 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5613 } else { 5614 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5615 isrowb = *rowb; iscolb = *colb; 5616 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5617 bseq[0] = *B_seq; 5618 } 5619 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5620 *B_seq = bseq[0]; 5621 ierr = PetscFree(bseq);CHKERRQ(ierr); 5622 if (!rowb) { 5623 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5624 } else { 5625 *rowb = isrowb; 5626 } 5627 if (!colb) { 5628 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5629 } else { 5630 *colb = iscolb; 5631 } 5632 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5633 PetscFunctionReturn(0); 5634 } 5635 5636 /* 5637 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5638 of the OFF-DIAGONAL portion of local A 5639 5640 Collective on Mat 5641 5642 Input Parameters: 5643 + A,B - the matrices in mpiaij format 5644 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5645 5646 Output Parameter: 5647 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5648 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5649 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5650 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5651 5652 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5653 for this matrix. This is not desirable.. 5654 5655 Level: developer 5656 5657 */ 5658 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5659 { 5660 PetscErrorCode ierr; 5661 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5662 Mat_SeqAIJ *b_oth; 5663 VecScatter ctx; 5664 MPI_Comm comm; 5665 const PetscMPIInt *rprocs,*sprocs; 5666 const PetscInt *srow,*rstarts,*sstarts; 5667 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5668 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5669 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5670 MPI_Request *rwaits = NULL,*swaits = NULL; 5671 MPI_Status rstatus; 5672 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5673 5674 PetscFunctionBegin; 5675 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5676 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5677 5678 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5679 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5680 } 5681 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5682 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5683 5684 if (size == 1) { 5685 startsj_s = NULL; 5686 bufa_ptr = NULL; 5687 *B_oth = NULL; 5688 PetscFunctionReturn(0); 5689 } 5690 5691 ctx = a->Mvctx; 5692 tag = ((PetscObject)ctx)->tag; 5693 5694 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5695 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5696 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5697 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5698 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5699 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5700 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5701 5702 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5703 if (scall == MAT_INITIAL_MATRIX) { 5704 /* i-array */ 5705 /*---------*/ 5706 /* post receives */ 5707 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5708 for (i=0; i<nrecvs; i++) { 5709 rowlen = rvalues + rstarts[i]*rbs; 5710 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5711 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5712 } 5713 5714 /* pack the outgoing message */ 5715 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5716 5717 sstartsj[0] = 0; 5718 rstartsj[0] = 0; 5719 len = 0; /* total length of j or a array to be sent */ 5720 if (nsends) { 5721 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5722 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5723 } 5724 for (i=0; i<nsends; i++) { 5725 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5726 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5727 for (j=0; j<nrows; j++) { 5728 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5729 for (l=0; l<sbs; l++) { 5730 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5731 5732 rowlen[j*sbs+l] = ncols; 5733 5734 len += ncols; 5735 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5736 } 5737 k++; 5738 } 5739 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5740 5741 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5742 } 5743 /* recvs and sends of i-array are completed */ 5744 i = nrecvs; 5745 while (i--) { 5746 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5747 } 5748 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5749 ierr = PetscFree(svalues);CHKERRQ(ierr); 5750 5751 /* allocate buffers for sending j and a arrays */ 5752 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5753 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5754 5755 /* create i-array of B_oth */ 5756 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5757 5758 b_othi[0] = 0; 5759 len = 0; /* total length of j or a array to be received */ 5760 k = 0; 5761 for (i=0; i<nrecvs; i++) { 5762 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5763 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5764 for (j=0; j<nrows; j++) { 5765 b_othi[k+1] = b_othi[k] + rowlen[j]; 5766 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5767 k++; 5768 } 5769 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5770 } 5771 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5772 5773 /* allocate space for j and a arrrays of B_oth */ 5774 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5775 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5776 5777 /* j-array */ 5778 /*---------*/ 5779 /* post receives of j-array */ 5780 for (i=0; i<nrecvs; i++) { 5781 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5782 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5783 } 5784 5785 /* pack the outgoing message j-array */ 5786 if (nsends) k = sstarts[0]; 5787 for (i=0; i<nsends; i++) { 5788 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5789 bufJ = bufj+sstartsj[i]; 5790 for (j=0; j<nrows; j++) { 5791 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5792 for (ll=0; ll<sbs; ll++) { 5793 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5794 for (l=0; l<ncols; l++) { 5795 *bufJ++ = cols[l]; 5796 } 5797 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5798 } 5799 } 5800 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5801 } 5802 5803 /* recvs and sends of j-array are completed */ 5804 i = nrecvs; 5805 while (i--) { 5806 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5807 } 5808 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5809 } else if (scall == MAT_REUSE_MATRIX) { 5810 sstartsj = *startsj_s; 5811 rstartsj = *startsj_r; 5812 bufa = *bufa_ptr; 5813 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5814 b_otha = b_oth->a; 5815 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5816 5817 /* a-array */ 5818 /*---------*/ 5819 /* post receives of a-array */ 5820 for (i=0; i<nrecvs; i++) { 5821 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5822 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5823 } 5824 5825 /* pack the outgoing message a-array */ 5826 if (nsends) k = sstarts[0]; 5827 for (i=0; i<nsends; i++) { 5828 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5829 bufA = bufa+sstartsj[i]; 5830 for (j=0; j<nrows; j++) { 5831 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5832 for (ll=0; ll<sbs; ll++) { 5833 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5834 for (l=0; l<ncols; l++) { 5835 *bufA++ = vals[l]; 5836 } 5837 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5838 } 5839 } 5840 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5841 } 5842 /* recvs and sends of a-array are completed */ 5843 i = nrecvs; 5844 while (i--) { 5845 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5846 } 5847 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5848 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5849 5850 if (scall == MAT_INITIAL_MATRIX) { 5851 /* put together the new matrix */ 5852 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5853 5854 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5855 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5856 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5857 b_oth->free_a = PETSC_TRUE; 5858 b_oth->free_ij = PETSC_TRUE; 5859 b_oth->nonew = 0; 5860 5861 ierr = PetscFree(bufj);CHKERRQ(ierr); 5862 if (!startsj_s || !bufa_ptr) { 5863 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5864 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5865 } else { 5866 *startsj_s = sstartsj; 5867 *startsj_r = rstartsj; 5868 *bufa_ptr = bufa; 5869 } 5870 } 5871 5872 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5873 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5874 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5875 PetscFunctionReturn(0); 5876 } 5877 5878 /*@C 5879 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5880 5881 Not Collective 5882 5883 Input Parameters: 5884 . A - The matrix in mpiaij format 5885 5886 Output Parameter: 5887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5888 . colmap - A map from global column index to local index into lvec 5889 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5890 5891 Level: developer 5892 5893 @*/ 5894 #if defined(PETSC_USE_CTABLE) 5895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5896 #else 5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5898 #endif 5899 { 5900 Mat_MPIAIJ *a; 5901 5902 PetscFunctionBegin; 5903 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5904 PetscValidPointer(lvec, 2); 5905 PetscValidPointer(colmap, 3); 5906 PetscValidPointer(multScatter, 4); 5907 a = (Mat_MPIAIJ*) A->data; 5908 if (lvec) *lvec = a->lvec; 5909 if (colmap) *colmap = a->colmap; 5910 if (multScatter) *multScatter = a->Mvctx; 5911 PetscFunctionReturn(0); 5912 } 5913 5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5917 #if defined(PETSC_HAVE_MKL_SPARSE) 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5919 #endif 5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5922 #if defined(PETSC_HAVE_ELEMENTAL) 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5924 #endif 5925 #if defined(PETSC_HAVE_HYPRE) 5926 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5927 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5928 #endif 5929 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5932 5933 /* 5934 Computes (B'*A')' since computing B*A directly is untenable 5935 5936 n p p 5937 ( ) ( ) ( ) 5938 m ( A ) * n ( B ) = m ( C ) 5939 ( ) ( ) ( ) 5940 5941 */ 5942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5943 { 5944 PetscErrorCode ierr; 5945 Mat At,Bt,Ct; 5946 5947 PetscFunctionBegin; 5948 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5949 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5950 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5951 ierr = MatDestroy(&At);CHKERRQ(ierr); 5952 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5953 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5954 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5955 PetscFunctionReturn(0); 5956 } 5957 5958 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5959 { 5960 PetscErrorCode ierr; 5961 PetscInt m=A->rmap->n,n=B->cmap->n; 5962 Mat Cmat; 5963 5964 PetscFunctionBegin; 5965 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5966 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5967 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5968 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5969 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5970 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5971 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5972 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5973 5974 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5975 5976 *C = Cmat; 5977 PetscFunctionReturn(0); 5978 } 5979 5980 /* ----------------------------------------------------------------*/ 5981 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5982 { 5983 PetscErrorCode ierr; 5984 5985 PetscFunctionBegin; 5986 if (scall == MAT_INITIAL_MATRIX) { 5987 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5988 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5989 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5990 } 5991 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5992 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5993 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5994 PetscFunctionReturn(0); 5995 } 5996 5997 /*MC 5998 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5999 6000 Options Database Keys: 6001 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6002 6003 Level: beginner 6004 6005 Notes: 6006 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6007 in this case the values associated with the rows and columns one passes in are set to zero 6008 in the matrix 6009 6010 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6011 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6012 6013 .seealso: MatCreateAIJ() 6014 M*/ 6015 6016 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6017 { 6018 Mat_MPIAIJ *b; 6019 PetscErrorCode ierr; 6020 PetscMPIInt size; 6021 6022 PetscFunctionBegin; 6023 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6024 6025 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6026 B->data = (void*)b; 6027 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6028 B->assembled = PETSC_FALSE; 6029 B->insertmode = NOT_SET_VALUES; 6030 b->size = size; 6031 6032 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6033 6034 /* build cache for off array entries formed */ 6035 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6036 6037 b->donotstash = PETSC_FALSE; 6038 b->colmap = 0; 6039 b->garray = 0; 6040 b->roworiented = PETSC_TRUE; 6041 6042 /* stuff used for matrix vector multiply */ 6043 b->lvec = NULL; 6044 b->Mvctx = NULL; 6045 6046 /* stuff for MatGetRow() */ 6047 b->rowindices = 0; 6048 b->rowvalues = 0; 6049 b->getrowactive = PETSC_FALSE; 6050 6051 /* flexible pointer used in CUSP/CUSPARSE classes */ 6052 b->spptr = NULL; 6053 6054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6064 #if defined(PETSC_HAVE_MKL_SPARSE) 6065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6066 #endif 6067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6070 #if defined(PETSC_HAVE_ELEMENTAL) 6071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6072 #endif 6073 #if defined(PETSC_HAVE_HYPRE) 6074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6075 #endif 6076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6081 #if defined(PETSC_HAVE_HYPRE) 6082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6083 #endif 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6086 PetscFunctionReturn(0); 6087 } 6088 6089 /*@C 6090 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6091 and "off-diagonal" part of the matrix in CSR format. 6092 6093 Collective 6094 6095 Input Parameters: 6096 + comm - MPI communicator 6097 . m - number of local rows (Cannot be PETSC_DECIDE) 6098 . n - This value should be the same as the local size used in creating the 6099 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6100 calculated if N is given) For square matrices n is almost always m. 6101 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6102 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6103 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6104 . j - column indices 6105 . a - matrix values 6106 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6107 . oj - column indices 6108 - oa - matrix values 6109 6110 Output Parameter: 6111 . mat - the matrix 6112 6113 Level: advanced 6114 6115 Notes: 6116 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6117 must free the arrays once the matrix has been destroyed and not before. 6118 6119 The i and j indices are 0 based 6120 6121 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6122 6123 This sets local rows and cannot be used to set off-processor values. 6124 6125 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6126 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6127 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6128 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6129 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6130 communication if it is known that only local entries will be set. 6131 6132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6133 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6134 @*/ 6135 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6136 { 6137 PetscErrorCode ierr; 6138 Mat_MPIAIJ *maij; 6139 6140 PetscFunctionBegin; 6141 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6142 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6143 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6144 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6145 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6146 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6147 maij = (Mat_MPIAIJ*) (*mat)->data; 6148 6149 (*mat)->preallocated = PETSC_TRUE; 6150 6151 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6152 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6153 6154 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6155 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6156 6157 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6158 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6159 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6160 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6161 6162 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6163 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6164 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6165 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6166 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6167 PetscFunctionReturn(0); 6168 } 6169 6170 /* 6171 Special version for direct calls from Fortran 6172 */ 6173 #include <petsc/private/fortranimpl.h> 6174 6175 /* Change these macros so can be used in void function */ 6176 #undef CHKERRQ 6177 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6178 #undef SETERRQ2 6179 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6180 #undef SETERRQ3 6181 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6182 #undef SETERRQ 6183 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6184 6185 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6186 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6187 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6188 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6189 #else 6190 #endif 6191 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6192 { 6193 Mat mat = *mmat; 6194 PetscInt m = *mm, n = *mn; 6195 InsertMode addv = *maddv; 6196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6197 PetscScalar value; 6198 PetscErrorCode ierr; 6199 6200 MatCheckPreallocated(mat,1); 6201 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6202 6203 #if defined(PETSC_USE_DEBUG) 6204 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6205 #endif 6206 { 6207 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6208 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6209 PetscBool roworiented = aij->roworiented; 6210 6211 /* Some Variables required in the macro */ 6212 Mat A = aij->A; 6213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6214 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6215 MatScalar *aa = a->a; 6216 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6217 Mat B = aij->B; 6218 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6219 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6220 MatScalar *ba = b->a; 6221 6222 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6223 PetscInt nonew = a->nonew; 6224 MatScalar *ap1,*ap2; 6225 6226 PetscFunctionBegin; 6227 for (i=0; i<m; i++) { 6228 if (im[i] < 0) continue; 6229 #if defined(PETSC_USE_DEBUG) 6230 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6231 #endif 6232 if (im[i] >= rstart && im[i] < rend) { 6233 row = im[i] - rstart; 6234 lastcol1 = -1; 6235 rp1 = aj + ai[row]; 6236 ap1 = aa + ai[row]; 6237 rmax1 = aimax[row]; 6238 nrow1 = ailen[row]; 6239 low1 = 0; 6240 high1 = nrow1; 6241 lastcol2 = -1; 6242 rp2 = bj + bi[row]; 6243 ap2 = ba + bi[row]; 6244 rmax2 = bimax[row]; 6245 nrow2 = bilen[row]; 6246 low2 = 0; 6247 high2 = nrow2; 6248 6249 for (j=0; j<n; j++) { 6250 if (roworiented) value = v[i*n+j]; 6251 else value = v[i+j*m]; 6252 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6253 if (in[j] >= cstart && in[j] < cend) { 6254 col = in[j] - cstart; 6255 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6256 } else if (in[j] < 0) continue; 6257 #if defined(PETSC_USE_DEBUG) 6258 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6259 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6260 #endif 6261 else { 6262 if (mat->was_assembled) { 6263 if (!aij->colmap) { 6264 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6265 } 6266 #if defined(PETSC_USE_CTABLE) 6267 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6268 col--; 6269 #else 6270 col = aij->colmap[in[j]] - 1; 6271 #endif 6272 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6273 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6274 col = in[j]; 6275 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6276 B = aij->B; 6277 b = (Mat_SeqAIJ*)B->data; 6278 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6279 rp2 = bj + bi[row]; 6280 ap2 = ba + bi[row]; 6281 rmax2 = bimax[row]; 6282 nrow2 = bilen[row]; 6283 low2 = 0; 6284 high2 = nrow2; 6285 bm = aij->B->rmap->n; 6286 ba = b->a; 6287 } 6288 } else col = in[j]; 6289 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6290 } 6291 } 6292 } else if (!aij->donotstash) { 6293 if (roworiented) { 6294 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6295 } else { 6296 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6297 } 6298 } 6299 } 6300 } 6301 PetscFunctionReturnVoid(); 6302 } 6303