1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 841 i = j; 842 } 843 } 844 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 845 } 846 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 847 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 848 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 849 if (mat->boundtocpu) { 850 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 851 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 #endif 854 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 855 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 856 857 /* determine if any processor has disassembled, if so we must 858 also disassemble ourself, in order that we may reassemble. */ 859 /* 860 if nonzero structure of submatrix B cannot change then we know that 861 no processor disassembled thus we can skip this stuff 862 */ 863 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 864 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 865 if (mat->was_assembled && !other_disassembled) { 866 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 867 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 868 #endif 869 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 } 872 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 873 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 874 } 875 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 876 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 877 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 878 #endif 879 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 880 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 881 882 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 883 884 aij->rowvalues = 0; 885 886 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 887 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 888 889 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 890 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 891 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 892 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 893 } 894 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 895 mat->offloadmask = PETSC_OFFLOAD_BOTH; 896 #endif 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 901 { 902 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 907 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 914 PetscObjectState sA, sB; 915 PetscInt *lrows; 916 PetscInt r, len; 917 PetscBool cong, lch, gch; 918 PetscErrorCode ierr; 919 920 PetscFunctionBegin; 921 /* get locally owned rows */ 922 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 923 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 924 /* fix right hand side if needed */ 925 if (x && b) { 926 const PetscScalar *xx; 927 PetscScalar *bb; 928 929 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 930 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 931 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 932 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 933 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 934 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 935 } 936 937 sA = mat->A->nonzerostate; 938 sB = mat->B->nonzerostate; 939 940 if (diag != 0.0 && cong) { 941 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 942 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 943 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 944 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 945 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 946 PetscInt nnwA, nnwB; 947 PetscBool nnzA, nnzB; 948 949 nnwA = aijA->nonew; 950 nnwB = aijB->nonew; 951 nnzA = aijA->keepnonzeropattern; 952 nnzB = aijB->keepnonzeropattern; 953 if (!nnzA) { 954 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 955 aijA->nonew = 0; 956 } 957 if (!nnzB) { 958 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 959 aijB->nonew = 0; 960 } 961 /* Must zero here before the next loop */ 962 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 964 for (r = 0; r < len; ++r) { 965 const PetscInt row = lrows[r] + A->rmap->rstart; 966 if (row >= A->cmap->N) continue; 967 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 968 } 969 aijA->nonew = nnwA; 970 aijB->nonew = nnwB; 971 } else { 972 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 974 } 975 ierr = PetscFree(lrows);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 979 /* reduce nonzerostate */ 980 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 981 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 982 if (gch) A->nonzerostate++; 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 987 { 988 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscMPIInt n = A->rmap->n; 991 PetscInt i,j,r,m,len = 0; 992 PetscInt *lrows,*owners = A->rmap->range; 993 PetscMPIInt p = 0; 994 PetscSFNode *rrows; 995 PetscSF sf; 996 const PetscScalar *xx; 997 PetscScalar *bb,*mask; 998 Vec xmask,lmask; 999 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1000 const PetscInt *aj, *ii,*ridx; 1001 PetscScalar *aa; 1002 1003 PetscFunctionBegin; 1004 /* Create SF where leaves are input rows and roots are owned rows */ 1005 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1006 for (r = 0; r < n; ++r) lrows[r] = -1; 1007 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1008 for (r = 0; r < N; ++r) { 1009 const PetscInt idx = rows[r]; 1010 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1011 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1012 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1013 } 1014 rrows[r].rank = p; 1015 rrows[r].index = rows[r] - owners[p]; 1016 } 1017 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1018 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1019 /* Collect flags for rows to be zeroed */ 1020 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1022 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1023 /* Compress and put in row numbers */ 1024 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1025 /* zero diagonal part of matrix */ 1026 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1027 /* handle off diagonal part of matrix */ 1028 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1029 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1030 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1031 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1032 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1033 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1036 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1037 PetscBool cong; 1038 1039 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1040 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1041 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1044 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1045 } 1046 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1047 /* remove zeroed rows of off diagonal matrix */ 1048 ii = aij->i; 1049 for (i=0; i<len; i++) { 1050 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1051 } 1052 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1053 if (aij->compressedrow.use) { 1054 m = aij->compressedrow.nrows; 1055 ii = aij->compressedrow.i; 1056 ridx = aij->compressedrow.rindex; 1057 for (i=0; i<m; i++) { 1058 n = ii[i+1] - ii[i]; 1059 aj = aij->j + ii[i]; 1060 aa = aij->a + ii[i]; 1061 1062 for (j=0; j<n; j++) { 1063 if (PetscAbsScalar(mask[*aj])) { 1064 if (b) bb[*ridx] -= *aa*xx[*aj]; 1065 *aa = 0.0; 1066 } 1067 aa++; 1068 aj++; 1069 } 1070 ridx++; 1071 } 1072 } else { /* do not use compressed row format */ 1073 m = l->B->rmap->n; 1074 for (i=0; i<m; i++) { 1075 n = ii[i+1] - ii[i]; 1076 aj = aij->j + ii[i]; 1077 aa = aij->a + ii[i]; 1078 for (j=0; j<n; j++) { 1079 if (PetscAbsScalar(mask[*aj])) { 1080 if (b) bb[i] -= *aa*xx[*aj]; 1081 *aa = 0.0; 1082 } 1083 aa++; 1084 aj++; 1085 } 1086 } 1087 } 1088 if (x && b) { 1089 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1090 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1091 } 1092 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1093 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1094 ierr = PetscFree(lrows);CHKERRQ(ierr); 1095 1096 /* only change matrix nonzero state if pattern was allowed to be changed */ 1097 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1098 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1099 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1100 } 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt nt; 1109 VecScatter Mvctx = a->Mvctx; 1110 1111 PetscFunctionBegin; 1112 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1113 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1114 1115 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 VecScatter Mvctx = a->Mvctx; 1137 1138 PetscFunctionBegin; 1139 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1140 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1157 /* add partial results together */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1164 { 1165 MPI_Comm comm; 1166 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1167 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1168 IS Me,Notme; 1169 PetscErrorCode ierr; 1170 PetscInt M,N,first,last,*notme,i; 1171 PetscBool lf; 1172 PetscMPIInt size; 1173 1174 PetscFunctionBegin; 1175 /* Easy test: symmetric diagonal block */ 1176 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1177 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1178 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1179 if (!*f) PetscFunctionReturn(0); 1180 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1182 if (size == 1) PetscFunctionReturn(0); 1183 1184 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1185 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1186 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1187 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1188 for (i=0; i<first; i++) notme[i] = i; 1189 for (i=last; i<M; i++) notme[i-last+first] = i; 1190 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1191 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1192 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1193 Aoff = Aoffs[0]; 1194 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1195 Boff = Boffs[0]; 1196 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1198 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1200 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1201 ierr = PetscFree(notme);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1206 { 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 /* do nondiagonal part */ 1221 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1222 /* do local part */ 1223 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1224 /* add partial results together */ 1225 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /* 1231 This only works correctly for square matrices where the subblock A->A is the 1232 diagonal block 1233 */ 1234 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1235 { 1236 PetscErrorCode ierr; 1237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1242 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 #if defined(PETSC_USE_LOG) 1264 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1265 #endif 1266 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1267 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1269 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1270 #if defined(PETSC_USE_CTABLE) 1271 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1272 #else 1273 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1274 #endif 1275 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1276 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1277 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1278 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1279 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1280 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1281 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1282 1283 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1293 #if defined(PETSC_HAVE_ELEMENTAL) 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1295 #endif 1296 #if defined(PETSC_HAVE_HYPRE) 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1307 { 1308 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1309 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1310 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1311 const PetscInt *garray = aij->garray; 1312 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1313 PetscInt *rowlens; 1314 PetscInt *colidxs; 1315 PetscScalar *matvals; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1320 1321 M = mat->rmap->N; 1322 N = mat->cmap->N; 1323 m = mat->rmap->n; 1324 rs = mat->rmap->rstart; 1325 cs = mat->cmap->rstart; 1326 nz = A->nz + B->nz; 1327 1328 /* write matrix header */ 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = M; header[2] = N; header[3] = nz; 1331 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1333 1334 /* fill in and store row lengths */ 1335 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1336 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1337 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1338 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1339 1340 /* fill in and store column indices */ 1341 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1342 for (cnt=0, i=0; i<m; i++) { 1343 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1344 if (garray[B->j[jb]] > cs) break; 1345 colidxs[cnt++] = garray[B->j[jb]]; 1346 } 1347 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1348 colidxs[cnt++] = A->j[ja] + cs; 1349 for (; jb<B->i[i+1]; jb++) 1350 colidxs[cnt++] = garray[B->j[jb]]; 1351 } 1352 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1353 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1354 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1355 1356 /* fill in and store nonzero values */ 1357 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1358 for (cnt=0, i=0; i<m; i++) { 1359 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1360 if (garray[B->j[jb]] > cs) break; 1361 matvals[cnt++] = B->a[jb]; 1362 } 1363 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1364 matvals[cnt++] = A->a[ja]; 1365 for (; jb<B->i[i+1]; jb++) 1366 matvals[cnt++] = B->a[jb]; 1367 } 1368 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1369 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1370 ierr = PetscFree(matvals);CHKERRQ(ierr); 1371 1372 /* write block size option to the viewer's .info file */ 1373 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #include <petscdraw.h> 1378 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1379 { 1380 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1381 PetscErrorCode ierr; 1382 PetscMPIInt rank = aij->rank,size = aij->size; 1383 PetscBool isdraw,iascii,isbinary; 1384 PetscViewer sviewer; 1385 PetscViewerFormat format; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 if (iascii) { 1392 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1393 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1394 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1395 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1396 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 for (i=0; i<(PetscInt)size; i++) { 1398 nmax = PetscMax(nmax,nz[i]); 1399 nmin = PetscMin(nmin,nz[i]); 1400 navg += nz[i]; 1401 } 1402 ierr = PetscFree(nz);CHKERRQ(ierr); 1403 navg = navg/size; 1404 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1408 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1409 MatInfo info; 1410 PetscBool inodes; 1411 1412 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1413 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1414 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1416 if (!inodes) { 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1418 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1421 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1422 } 1423 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1425 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1427 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1429 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1430 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1433 PetscInt inodecount,inodelimit,*inodes; 1434 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1435 if (inodes) { 1436 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1442 PetscFunctionReturn(0); 1443 } 1444 } else if (isbinary) { 1445 if (size == 1) { 1446 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1447 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1448 } else { 1449 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1450 } 1451 PetscFunctionReturn(0); 1452 } else if (iascii && size == 1) { 1453 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1454 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } else if (isdraw) { 1457 PetscDraw draw; 1458 PetscBool isnull; 1459 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1460 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1461 if (isnull) PetscFunctionReturn(0); 1462 } 1463 1464 { /* assemble the entire matrix onto first processor */ 1465 Mat A = NULL, Av; 1466 IS isrow,iscol; 1467 1468 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1469 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1470 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1471 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1472 /* The commented code uses MatCreateSubMatrices instead */ 1473 /* 1474 Mat *AA, A = NULL, Av; 1475 IS isrow,iscol; 1476 1477 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1478 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1479 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1480 if (!rank) { 1481 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1482 A = AA[0]; 1483 Av = AA[0]; 1484 } 1485 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1486 */ 1487 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1488 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1489 /* 1490 Everyone has to call to draw the matrix since the graphics waits are 1491 synchronized across all processors that share the PetscDraw object 1492 */ 1493 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1494 if (!rank) { 1495 if (((PetscObject)mat)->name) { 1496 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1497 } 1498 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1499 } 1500 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1501 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1502 ierr = MatDestroy(&A);CHKERRQ(ierr); 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1508 { 1509 PetscErrorCode ierr; 1510 PetscBool iascii,isdraw,issocket,isbinary; 1511 1512 PetscFunctionBegin; 1513 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1517 if (iascii || isdraw || isbinary || issocket) { 1518 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1524 { 1525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1526 PetscErrorCode ierr; 1527 Vec bb1 = 0; 1528 PetscBool hasop; 1529 1530 PetscFunctionBegin; 1531 if (flag == SOR_APPLY_UPPER) { 1532 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1537 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1538 } 1539 1540 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1541 if (flag & SOR_ZERO_INITIAL_GUESS) { 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1543 its--; 1544 } 1545 1546 while (its--) { 1547 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 1550 /* update rhs: bb1 = bb - B*x */ 1551 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1553 1554 /* local sweep */ 1555 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1556 } 1557 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1558 if (flag & SOR_ZERO_INITIAL_GUESS) { 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1560 its--; 1561 } 1562 while (its--) { 1563 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 1566 /* update rhs: bb1 = bb - B*x */ 1567 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1568 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1569 1570 /* local sweep */ 1571 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1572 } 1573 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1574 if (flag & SOR_ZERO_INITIAL_GUESS) { 1575 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1576 its--; 1577 } 1578 while (its--) { 1579 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 1582 /* update rhs: bb1 = bb - B*x */ 1583 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1584 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1585 1586 /* local sweep */ 1587 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1588 } 1589 } else if (flag & SOR_EISENSTAT) { 1590 Vec xx1; 1591 1592 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1593 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1594 1595 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 if (!mat->diag) { 1598 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1599 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1600 } 1601 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1602 if (hasop) { 1603 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1604 } else { 1605 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1606 } 1607 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1608 1609 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1610 1611 /* local sweep */ 1612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1613 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1614 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1615 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1616 1617 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1618 1619 matin->factorerrortype = mat->A->factorerrortype; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1624 { 1625 Mat aA,aB,Aperm; 1626 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1627 PetscScalar *aa,*ba; 1628 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1629 PetscSF rowsf,sf; 1630 IS parcolp = NULL; 1631 PetscBool done; 1632 PetscErrorCode ierr; 1633 1634 PetscFunctionBegin; 1635 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1636 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1637 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1638 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1639 1640 /* Invert row permutation to find out where my rows should go */ 1641 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1642 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1643 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1644 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1645 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1646 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1647 1648 /* Invert column permutation to find out where my columns should go */ 1649 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1650 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1651 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1652 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1653 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1656 1657 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1658 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1659 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1660 1661 /* Find out where my gcols should go */ 1662 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1663 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1665 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1666 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1667 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1669 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1670 1671 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1672 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1673 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1674 for (i=0; i<m; i++) { 1675 PetscInt row = rdest[i]; 1676 PetscMPIInt rowner; 1677 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 PetscInt col = cdest[aj[j]]; 1680 PetscMPIInt cowner; 1681 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1682 if (rowner == cowner) dnnz[i]++; 1683 else onnz[i]++; 1684 } 1685 for (j=bi[i]; j<bi[i+1]; j++) { 1686 PetscInt col = gcdest[bj[j]]; 1687 PetscMPIInt cowner; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 } 1693 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1694 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1695 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1697 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1698 1699 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1700 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1701 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1702 for (i=0; i<m; i++) { 1703 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1704 PetscInt j0,rowlen; 1705 rowlen = ai[i+1] - ai[i]; 1706 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1707 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1708 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1709 } 1710 rowlen = bi[i+1] - bi[i]; 1711 for (j0=j=0; j<rowlen; j0=j) { 1712 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1713 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1714 } 1715 } 1716 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1719 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1720 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1721 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1722 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1723 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1724 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1725 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1726 *B = Aperm; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1731 { 1732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1737 if (ghosts) *ghosts = aij->garray; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscLogDouble isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 1752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1753 isend[3] = info->memory; isend[4] = info->mallocs; 1754 1755 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1756 1757 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1758 isend[3] += info->memory; isend[4] += info->mallocs; 1759 if (flag == MAT_LOCAL) { 1760 info->nz_used = isend[0]; 1761 info->nz_allocated = isend[1]; 1762 info->nz_unneeded = isend[2]; 1763 info->memory = isend[3]; 1764 info->mallocs = isend[4]; 1765 } else if (flag == MAT_GLOBAL_MAX) { 1766 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1767 1768 info->nz_used = irecv[0]; 1769 info->nz_allocated = irecv[1]; 1770 info->nz_unneeded = irecv[2]; 1771 info->memory = irecv[3]; 1772 info->mallocs = irecv[4]; 1773 } else if (flag == MAT_GLOBAL_SUM) { 1774 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1775 1776 info->nz_used = irecv[0]; 1777 info->nz_allocated = irecv[1]; 1778 info->nz_unneeded = irecv[2]; 1779 info->memory = irecv[3]; 1780 info->mallocs = irecv[4]; 1781 } 1782 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1783 info->fill_ratio_needed = 0; 1784 info->factor_mallocs = 0; 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1789 { 1790 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1791 PetscErrorCode ierr; 1792 1793 PetscFunctionBegin; 1794 switch (op) { 1795 case MAT_NEW_NONZERO_LOCATIONS: 1796 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1797 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1798 case MAT_KEEP_NONZERO_PATTERN: 1799 case MAT_NEW_NONZERO_LOCATION_ERR: 1800 case MAT_IGNORE_ZERO_ENTRIES: 1801 MatCheckPreallocated(A,1); 1802 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1803 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1804 break; 1805 case MAT_USE_INODES: 1806 if (PetscUnlikely(!(A)->preallocated)) { 1807 a->inode_setoption = PETSC_TRUE; /* option will be set in MatMPIAIJSetPreallocation_MPIAIJ() */ 1808 a->inode_use = flg; 1809 } else { 1810 a->inode_setoption = PETSC_FALSE; 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 } 1814 break; 1815 case MAT_ROW_ORIENTED: 1816 MatCheckPreallocated(A,1); 1817 a->roworiented = flg; 1818 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1821 break; 1822 case MAT_NEW_DIAGONALS: 1823 case MAT_SORTED_FULL: 1824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1825 break; 1826 case MAT_IGNORE_OFF_PROC_ENTRIES: 1827 a->donotstash = flg; 1828 break; 1829 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1830 case MAT_SPD: 1831 case MAT_SYMMETRIC: 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 case MAT_HERMITIAN: 1834 case MAT_SYMMETRY_ETERNAL: 1835 break; 1836 case MAT_SUBMAT_SINGLEIS: 1837 A->submat_singleis = flg; 1838 break; 1839 case MAT_STRUCTURE_ONLY: 1840 /* The option is handled directly by MatSetOption() */ 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1849 { 1850 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1851 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1852 PetscErrorCode ierr; 1853 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1854 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1855 PetscInt *cmap,*idx_p; 1856 1857 PetscFunctionBegin; 1858 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1859 mat->getrowactive = PETSC_TRUE; 1860 1861 if (!mat->rowvalues && (idx || v)) { 1862 /* 1863 allocate enough space to hold information from the longest row. 1864 */ 1865 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1866 PetscInt max = 1,tmp; 1867 for (i=0; i<matin->rmap->n; i++) { 1868 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1869 if (max < tmp) max = tmp; 1870 } 1871 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1872 } 1873 1874 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1875 lrow = row - rstart; 1876 1877 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1878 if (!v) {pvA = 0; pvB = 0;} 1879 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1880 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 nztot = nzA + nzB; 1883 1884 cmap = mat->garray; 1885 if (v || idx) { 1886 if (nztot) { 1887 /* Sort by increasing column numbers, assuming A and B already sorted */ 1888 PetscInt imark = -1; 1889 if (v) { 1890 *v = v_p = mat->rowvalues; 1891 for (i=0; i<nzB; i++) { 1892 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1893 else break; 1894 } 1895 imark = i; 1896 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1897 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1898 } 1899 if (idx) { 1900 *idx = idx_p = mat->rowindices; 1901 if (imark > -1) { 1902 for (i=0; i<imark; i++) { 1903 idx_p[i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1908 else break; 1909 } 1910 imark = i; 1911 } 1912 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1913 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1914 } 1915 } else { 1916 if (idx) *idx = 0; 1917 if (v) *v = 0; 1918 } 1919 } 1920 *nz = nztot; 1921 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1922 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1927 { 1928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1929 1930 PetscFunctionBegin; 1931 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1932 aij->getrowactive = PETSC_FALSE; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,cstart = mat->cmap->rstart; 1942 PetscReal sum = 0.0; 1943 MatScalar *v; 1944 1945 PetscFunctionBegin; 1946 if (aij->size == 1) { 1947 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1948 } else { 1949 if (type == NORM_FROBENIUS) { 1950 v = amat->a; 1951 for (i=0; i<amat->nz; i++) { 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 } 1954 v = bmat->a; 1955 for (i=0; i<bmat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1997 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1998 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2004 { 2005 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2006 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2007 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2008 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2009 PetscErrorCode ierr; 2010 Mat B,A_diag,*B_diag; 2011 const MatScalar *array; 2012 2013 PetscFunctionBegin; 2014 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2015 ai = Aloc->i; aj = Aloc->j; 2016 bi = Bloc->i; bj = Bloc->j; 2017 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2018 PetscInt *d_nnz,*g_nnz,*o_nnz; 2019 PetscSFNode *oloc; 2020 PETSC_UNUSED PetscSF sf; 2021 2022 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2023 /* compute d_nnz for preallocation */ 2024 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2025 for (i=0; i<ai[ma]; i++) { 2026 d_nnz[aj[i]]++; 2027 } 2028 /* compute local off-diagonal contributions */ 2029 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2030 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2031 /* map those to global */ 2032 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2033 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2034 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2035 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2036 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2037 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2038 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2039 2040 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2041 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2042 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2043 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2044 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2045 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2046 } else { 2047 B = *matout; 2048 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2049 } 2050 2051 b = (Mat_MPIAIJ*)B->data; 2052 A_diag = a->A; 2053 B_diag = &b->A; 2054 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2055 A_diag_ncol = A_diag->cmap->N; 2056 B_diag_ilen = sub_B_diag->ilen; 2057 B_diag_i = sub_B_diag->i; 2058 2059 /* Set ilen for diagonal of B */ 2060 for (i=0; i<A_diag_ncol; i++) { 2061 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2062 } 2063 2064 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2065 very quickly (=without using MatSetValues), because all writes are local. */ 2066 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2067 2068 /* copy over the B part */ 2069 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2070 array = Bloc->a; 2071 row = A->rmap->rstart; 2072 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2073 cols_tmp = cols; 2074 for (i=0; i<mb; i++) { 2075 ncol = bi[i+1]-bi[i]; 2076 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2077 row++; 2078 array += ncol; cols_tmp += ncol; 2079 } 2080 ierr = PetscFree(cols);CHKERRQ(ierr); 2081 2082 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2085 *matout = B; 2086 } else { 2087 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2093 { 2094 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2095 Mat a = aij->A,b = aij->B; 2096 PetscErrorCode ierr; 2097 PetscInt s1,s2,s3; 2098 2099 PetscFunctionBegin; 2100 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2101 if (rr) { 2102 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2103 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2104 /* Overlap communication with computation. */ 2105 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2106 } 2107 if (ll) { 2108 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2109 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2110 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2111 } 2112 /* scale the diagonal block */ 2113 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2114 2115 if (rr) { 2116 /* Do a scatter end and then right scale the off-diagonal block */ 2117 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2124 { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2134 { 2135 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2136 Mat a,b,c,d; 2137 PetscBool flg; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 a = matA->A; b = matA->B; 2142 c = matB->A; d = matB->B; 2143 2144 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2145 if (flg) { 2146 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2147 } 2148 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2153 { 2154 PetscErrorCode ierr; 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2156 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2157 2158 PetscFunctionBegin; 2159 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2160 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2161 /* because of the column compression in the off-processor part of the matrix a->B, 2162 the number of columns in a->B and b->B may be different, hence we cannot call 2163 the MatCopy() directly on the two parts. If need be, we can provide a more 2164 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2165 then copying the submatrices */ 2166 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2167 } else { 2168 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2169 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2170 } 2171 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2176 { 2177 PetscErrorCode ierr; 2178 2179 PetscFunctionBegin; 2180 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* 2185 Computes the number of nonzeros per row needed for preallocation when X and Y 2186 have different nonzero structure. 2187 */ 2188 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2189 { 2190 PetscInt i,j,k,nzx,nzy; 2191 2192 PetscFunctionBegin; 2193 /* Set the number of nonzeros in the new matrix */ 2194 for (i=0; i<m; i++) { 2195 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2196 nzx = xi[i+1] - xi[i]; 2197 nzy = yi[i+1] - yi[i]; 2198 nnz[i] = 0; 2199 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2200 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2201 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2202 nnz[i]++; 2203 } 2204 for (; k<nzy; k++) nnz[i]++; 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2211 { 2212 PetscErrorCode ierr; 2213 PetscInt m = Y->rmap->N; 2214 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2215 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2216 2217 PetscFunctionBegin; 2218 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2223 { 2224 PetscErrorCode ierr; 2225 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2226 PetscBLASInt bnz,one=1; 2227 Mat_SeqAIJ *x,*y; 2228 2229 PetscFunctionBegin; 2230 if (str == SAME_NONZERO_PATTERN) { 2231 PetscScalar alpha = a; 2232 x = (Mat_SeqAIJ*)xx->A->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 y = (Mat_SeqAIJ*)yy->A->data; 2235 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2236 x = (Mat_SeqAIJ*)xx->B->data; 2237 y = (Mat_SeqAIJ*)yy->B->data; 2238 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2239 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2240 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2241 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2242 will be updated */ 2243 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2244 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2245 Y->offloadmask = PETSC_OFFLOAD_CPU; 2246 } 2247 #endif 2248 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2249 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2250 } else { 2251 Mat B; 2252 PetscInt *nnz_d,*nnz_o; 2253 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2254 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2255 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2256 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2257 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2258 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2259 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2260 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2261 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2262 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2263 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2264 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2265 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2266 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 2271 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2272 2273 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2274 { 2275 #if defined(PETSC_USE_COMPLEX) 2276 PetscErrorCode ierr; 2277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2278 2279 PetscFunctionBegin; 2280 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2281 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2282 #else 2283 PetscFunctionBegin; 2284 #endif 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2295 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2300 { 2301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2306 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2311 { 2312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2313 PetscErrorCode ierr; 2314 PetscInt i,*idxb = 0; 2315 PetscScalar *va,*vb; 2316 Vec vtmp; 2317 2318 PetscFunctionBegin; 2319 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2320 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2321 if (idx) { 2322 for (i=0; i<A->rmap->n; i++) { 2323 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2324 } 2325 } 2326 2327 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2328 if (idx) { 2329 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2330 } 2331 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2332 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2333 2334 for (i=0; i<A->rmap->n; i++) { 2335 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2336 va[i] = vb[i]; 2337 if (idx) idx[i] = a->garray[idxb[i]]; 2338 } 2339 } 2340 2341 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2342 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2343 ierr = PetscFree(idxb);CHKERRQ(ierr); 2344 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->cmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++) { 2373 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 ierr = PetscFree(idxb);CHKERRQ(ierr); 2382 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2387 { 2388 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2389 PetscInt n = A->rmap->n; 2390 PetscInt cstart = A->cmap->rstart; 2391 PetscInt *cmap = mat->garray; 2392 PetscInt *diagIdx, *offdiagIdx; 2393 Vec diagV, offdiagV; 2394 PetscScalar *a, *diagA, *offdiagA; 2395 PetscInt r; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2402 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2403 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 for (r = 0; r < n; ++r) { 2408 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 idx[r] = cstart + diagIdx[r]; 2411 } else { 2412 a[r] = offdiagA[r]; 2413 idx[r] = cmap[offdiagIdx[r]]; 2414 } 2415 } 2416 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2428 PetscInt n = A->rmap->n; 2429 PetscInt cstart = A->cmap->rstart; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 PetscInt r; 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2440 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2441 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2442 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2443 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2444 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2445 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 for (r = 0; r < n; ++r) { 2447 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2448 a[r] = diagA[r]; 2449 idx[r] = cstart + diagIdx[r]; 2450 } else { 2451 a[r] = offdiagA[r]; 2452 idx[r] = cmap[offdiagIdx[r]]; 2453 } 2454 } 2455 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2456 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2458 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2459 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2460 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2465 { 2466 PetscErrorCode ierr; 2467 Mat *dummy; 2468 2469 PetscFunctionBegin; 2470 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2471 *newmat = *dummy; 2472 ierr = PetscFree(dummy);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2477 { 2478 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2479 PetscErrorCode ierr; 2480 2481 PetscFunctionBegin; 2482 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2483 A->factorerrortype = a->A->factorerrortype; 2484 PetscFunctionReturn(0); 2485 } 2486 2487 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2488 { 2489 PetscErrorCode ierr; 2490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2491 2492 PetscFunctionBegin; 2493 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2494 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2495 if (x->assembled) { 2496 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2497 } else { 2498 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2499 } 2500 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2506 { 2507 PetscFunctionBegin; 2508 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2509 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /*@ 2514 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2515 2516 Collective on Mat 2517 2518 Input Parameters: 2519 + A - the matrix 2520 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2521 2522 Level: advanced 2523 2524 @*/ 2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2526 { 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2535 { 2536 PetscErrorCode ierr; 2537 PetscBool sc = PETSC_FALSE,flg; 2538 2539 PetscFunctionBegin; 2540 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2541 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2542 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2543 if (flg) { 2544 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2545 } 2546 ierr = PetscOptionsTail();CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2551 { 2552 PetscErrorCode ierr; 2553 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2554 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2555 2556 PetscFunctionBegin; 2557 if (!Y->preallocated) { 2558 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2559 } else if (!aij->nz) { 2560 PetscInt nonew = aij->nonew; 2561 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2562 aij->nonew = nonew; 2563 } 2564 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2575 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2576 if (d) { 2577 PetscInt rstart; 2578 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2579 *d += rstart; 2580 2581 } 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2586 { 2587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 /* -------------------------------------------------------------------*/ 2596 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2597 MatGetRow_MPIAIJ, 2598 MatRestoreRow_MPIAIJ, 2599 MatMult_MPIAIJ, 2600 /* 4*/ MatMultAdd_MPIAIJ, 2601 MatMultTranspose_MPIAIJ, 2602 MatMultTransposeAdd_MPIAIJ, 2603 0, 2604 0, 2605 0, 2606 /*10*/ 0, 2607 0, 2608 0, 2609 MatSOR_MPIAIJ, 2610 MatTranspose_MPIAIJ, 2611 /*15*/ MatGetInfo_MPIAIJ, 2612 MatEqual_MPIAIJ, 2613 MatGetDiagonal_MPIAIJ, 2614 MatDiagonalScale_MPIAIJ, 2615 MatNorm_MPIAIJ, 2616 /*20*/ MatAssemblyBegin_MPIAIJ, 2617 MatAssemblyEnd_MPIAIJ, 2618 MatSetOption_MPIAIJ, 2619 MatZeroEntries_MPIAIJ, 2620 /*24*/ MatZeroRows_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*29*/ MatSetUp_MPIAIJ, 2626 0, 2627 0, 2628 MatGetDiagonalBlock_MPIAIJ, 2629 0, 2630 /*34*/ MatDuplicate_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*39*/ MatAXPY_MPIAIJ, 2636 MatCreateSubMatrices_MPIAIJ, 2637 MatIncreaseOverlap_MPIAIJ, 2638 MatGetValues_MPIAIJ, 2639 MatCopy_MPIAIJ, 2640 /*44*/ MatGetRowMax_MPIAIJ, 2641 MatScale_MPIAIJ, 2642 MatShift_MPIAIJ, 2643 MatDiagonalSet_MPIAIJ, 2644 MatZeroRowsColumns_MPIAIJ, 2645 /*49*/ MatSetRandom_MPIAIJ, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*54*/ MatFDColoringCreate_MPIXAIJ, 2651 0, 2652 MatSetUnfactored_MPIAIJ, 2653 MatPermute_MPIAIJ, 2654 0, 2655 /*59*/ MatCreateSubMatrix_MPIAIJ, 2656 MatDestroy_MPIAIJ, 2657 MatView_MPIAIJ, 2658 0, 2659 0, 2660 /*64*/ 0, 2661 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2666 MatGetRowMinAbs_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*75*/ MatFDColoringApply_AIJ, 2672 MatSetFromOptions_MPIAIJ, 2673 0, 2674 0, 2675 MatFindZeroDiagonals_MPIAIJ, 2676 /*80*/ 0, 2677 0, 2678 0, 2679 /*83*/ MatLoad_MPIAIJ, 2680 MatIsSymmetric_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*89*/ 0, 2686 0, 2687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2688 0, 2689 0, 2690 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 MatBindToCPU_MPIAIJ, 2695 /*99*/ MatProductSetFromOptions_MPIAIJ, 2696 0, 2697 0, 2698 MatConjugate_MPIAIJ, 2699 0, 2700 /*104*/MatSetValuesRow_MPIAIJ, 2701 MatRealPart_MPIAIJ, 2702 MatImaginaryPart_MPIAIJ, 2703 0, 2704 0, 2705 /*109*/0, 2706 0, 2707 MatGetRowMin_MPIAIJ, 2708 0, 2709 MatMissingDiagonal_MPIAIJ, 2710 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2711 0, 2712 MatGetGhosts_MPIAIJ, 2713 0, 2714 0, 2715 /*119*/0, 2716 0, 2717 0, 2718 0, 2719 MatGetMultiProcBlock_MPIAIJ, 2720 /*124*/MatFindNonzeroRows_MPIAIJ, 2721 MatGetColumnNorms_MPIAIJ, 2722 MatInvertBlockDiagonal_MPIAIJ, 2723 MatInvertVariableBlockDiagonal_MPIAIJ, 2724 MatCreateSubMatricesMPI_MPIAIJ, 2725 /*129*/0, 2726 0, 2727 0, 2728 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2729 0, 2730 /*134*/0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /*139*/MatSetBlockSizes_MPIAIJ, 2736 0, 2737 0, 2738 MatFDColoringSetUp_MPIXAIJ, 2739 MatFindOffBlockDiagonalEntries_MPIAIJ, 2740 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2741 /*145*/0, 2742 0, 2743 0 2744 }; 2745 2746 /* ----------------------------------------------------------------------------------------*/ 2747 2748 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2749 { 2750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2755 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2760 { 2761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2766 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2767 PetscFunctionReturn(0); 2768 } 2769 2770 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2771 { 2772 Mat_MPIAIJ *b; 2773 PetscErrorCode ierr; 2774 PetscMPIInt size; 2775 2776 PetscFunctionBegin; 2777 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2778 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2779 b = (Mat_MPIAIJ*)B->data; 2780 2781 #if defined(PETSC_USE_CTABLE) 2782 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2783 #else 2784 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2785 #endif 2786 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2787 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2788 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2789 2790 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2791 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2792 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2796 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2798 2799 if (!B->preallocated) { 2800 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2801 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2802 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2803 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2804 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2805 } 2806 2807 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2808 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2809 B->preallocated = PETSC_TRUE; 2810 B->was_assembled = PETSC_FALSE; 2811 B->assembled = PETSC_FALSE; 2812 2813 /* Set inode option */ 2814 if (b->inode_setoption) { 2815 ierr = MatSetOption(b->A,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr); 2816 ierr = MatSetOption(b->B,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr); 2817 } 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2822 { 2823 Mat_MPIAIJ *b; 2824 PetscErrorCode ierr; 2825 2826 PetscFunctionBegin; 2827 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2828 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2830 b = (Mat_MPIAIJ*)B->data; 2831 2832 #if defined(PETSC_USE_CTABLE) 2833 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2834 #else 2835 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2836 #endif 2837 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2838 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2839 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2840 2841 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2842 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2843 B->preallocated = PETSC_TRUE; 2844 B->was_assembled = PETSC_FALSE; 2845 B->assembled = PETSC_FALSE; 2846 PetscFunctionReturn(0); 2847 } 2848 2849 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2850 { 2851 Mat mat; 2852 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2853 PetscErrorCode ierr; 2854 2855 PetscFunctionBegin; 2856 *newmat = 0; 2857 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2858 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2859 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2860 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2861 a = (Mat_MPIAIJ*)mat->data; 2862 2863 mat->factortype = matin->factortype; 2864 mat->assembled = matin->assembled; 2865 mat->insertmode = NOT_SET_VALUES; 2866 mat->preallocated = matin->preallocated; 2867 2868 a->size = oldmat->size; 2869 a->rank = oldmat->rank; 2870 a->donotstash = oldmat->donotstash; 2871 a->roworiented = oldmat->roworiented; 2872 a->rowindices = NULL; 2873 a->rowvalues = NULL; 2874 a->getrowactive = PETSC_FALSE; 2875 2876 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2877 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2878 2879 if (oldmat->colmap) { 2880 #if defined(PETSC_USE_CTABLE) 2881 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2882 #else 2883 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2885 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2886 #endif 2887 } else a->colmap = NULL; 2888 if (oldmat->garray) { 2889 PetscInt len; 2890 len = oldmat->B->cmap->n; 2891 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2892 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2893 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2894 } else a->garray = NULL; 2895 2896 /* It may happen MatDuplicate is called with a non-assembled matrix 2897 In fact, MatDuplicate only requires the matrix to be preallocated 2898 This may happen inside a DMCreateMatrix_Shell */ 2899 if (oldmat->lvec) { 2900 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2902 } 2903 if (oldmat->Mvctx) { 2904 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2905 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2906 } 2907 if (oldmat->Mvctx_mpi1) { 2908 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2909 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2910 } 2911 2912 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2913 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2914 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2915 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2916 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2917 *newmat = mat; 2918 PetscFunctionReturn(0); 2919 } 2920 2921 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2922 { 2923 PetscBool isbinary, ishdf5; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2928 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2929 /* force binary viewer to load .info file if it has not yet done so */ 2930 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2931 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2932 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2933 if (isbinary) { 2934 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2935 } else if (ishdf5) { 2936 #if defined(PETSC_HAVE_HDF5) 2937 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2938 #else 2939 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2940 #endif 2941 } else { 2942 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2943 } 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2948 { 2949 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2950 PetscInt *rowidxs,*colidxs; 2951 PetscScalar *matvals; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2956 2957 /* read in matrix header */ 2958 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2959 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2960 M = header[1]; N = header[2]; nz = header[3]; 2961 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2962 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2963 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2964 2965 /* set block sizes from the viewer's .info file */ 2966 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2967 /* set global sizes if not set already */ 2968 if (mat->rmap->N < 0) mat->rmap->N = M; 2969 if (mat->cmap->N < 0) mat->cmap->N = N; 2970 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2971 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2972 2973 /* check if the matrix sizes are correct */ 2974 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2975 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2976 2977 /* read in row lengths and build row indices */ 2978 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2979 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2980 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2981 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2982 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2983 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2984 /* read in column indices and matrix values */ 2985 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2986 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2987 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2988 /* store matrix indices and values */ 2989 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2990 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2991 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2992 PetscFunctionReturn(0); 2993 } 2994 2995 /* Not scalable because of ISAllGather() unless getting all columns. */ 2996 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2997 { 2998 PetscErrorCode ierr; 2999 IS iscol_local; 3000 PetscBool isstride; 3001 PetscMPIInt lisstride=0,gisstride; 3002 3003 PetscFunctionBegin; 3004 /* check if we are grabbing all columns*/ 3005 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3006 3007 if (isstride) { 3008 PetscInt start,len,mstart,mlen; 3009 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3010 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3011 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3012 if (mstart == start && mlen-mstart == len) lisstride = 1; 3013 } 3014 3015 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3016 if (gisstride) { 3017 PetscInt N; 3018 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3019 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3020 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3021 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3022 } else { 3023 PetscInt cbs; 3024 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3025 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3026 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3027 } 3028 3029 *isseq = iscol_local; 3030 PetscFunctionReturn(0); 3031 } 3032 3033 /* 3034 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3035 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3036 3037 Input Parameters: 3038 mat - matrix 3039 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3040 i.e., mat->rstart <= isrow[i] < mat->rend 3041 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3042 i.e., mat->cstart <= iscol[i] < mat->cend 3043 Output Parameter: 3044 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3045 iscol_o - sequential column index set for retrieving mat->B 3046 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3047 */ 3048 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3049 { 3050 PetscErrorCode ierr; 3051 Vec x,cmap; 3052 const PetscInt *is_idx; 3053 PetscScalar *xarray,*cmaparray; 3054 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3055 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3056 Mat B=a->B; 3057 Vec lvec=a->lvec,lcmap; 3058 PetscInt i,cstart,cend,Bn=B->cmap->N; 3059 MPI_Comm comm; 3060 VecScatter Mvctx=a->Mvctx; 3061 3062 PetscFunctionBegin; 3063 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3064 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3065 3066 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3067 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3068 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3069 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3070 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3071 3072 /* Get start indices */ 3073 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3074 isstart -= ncols; 3075 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3076 3077 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3078 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3079 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3080 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3081 for (i=0; i<ncols; i++) { 3082 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3083 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3084 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3085 } 3086 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3087 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3088 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3089 3090 /* Get iscol_d */ 3091 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3092 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3094 3095 /* Get isrow_d */ 3096 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3097 rstart = mat->rmap->rstart; 3098 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3099 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3100 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3101 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3102 3103 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3104 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3105 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3106 3107 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3108 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3109 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3110 3111 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3112 3113 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3114 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3115 3116 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3117 /* off-process column indices */ 3118 count = 0; 3119 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3120 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3121 3122 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3123 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3124 for (i=0; i<Bn; i++) { 3125 if (PetscRealPart(xarray[i]) > -1.0) { 3126 idx[count] = i; /* local column index in off-diagonal part B */ 3127 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3128 count++; 3129 } 3130 } 3131 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3132 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3133 3134 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3135 /* cannot ensure iscol_o has same blocksize as iscol! */ 3136 3137 ierr = PetscFree(idx);CHKERRQ(ierr); 3138 *garray = cmap1; 3139 3140 ierr = VecDestroy(&x);CHKERRQ(ierr); 3141 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3142 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 3146 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3147 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3148 { 3149 PetscErrorCode ierr; 3150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3151 Mat M = NULL; 3152 MPI_Comm comm; 3153 IS iscol_d,isrow_d,iscol_o; 3154 Mat Asub = NULL,Bsub = NULL; 3155 PetscInt n; 3156 3157 PetscFunctionBegin; 3158 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3159 3160 if (call == MAT_REUSE_MATRIX) { 3161 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3162 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3163 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3164 3165 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3166 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3167 3168 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3169 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3170 3171 /* Update diagonal and off-diagonal portions of submat */ 3172 asub = (Mat_MPIAIJ*)(*submat)->data; 3173 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3174 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3175 if (n) { 3176 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3177 } 3178 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3179 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 3181 } else { /* call == MAT_INITIAL_MATRIX) */ 3182 const PetscInt *garray; 3183 PetscInt BsubN; 3184 3185 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3186 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3187 3188 /* Create local submatrices Asub and Bsub */ 3189 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3190 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3191 3192 /* Create submatrix M */ 3193 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3194 3195 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3196 asub = (Mat_MPIAIJ*)M->data; 3197 3198 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3199 n = asub->B->cmap->N; 3200 if (BsubN > n) { 3201 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3202 const PetscInt *idx; 3203 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3204 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3205 3206 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3207 j = 0; 3208 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3209 for (i=0; i<n; i++) { 3210 if (j >= BsubN) break; 3211 while (subgarray[i] > garray[j]) j++; 3212 3213 if (subgarray[i] == garray[j]) { 3214 idx_new[i] = idx[j++]; 3215 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3216 } 3217 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3218 3219 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3221 3222 } else if (BsubN < n) { 3223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3224 } 3225 3226 ierr = PetscFree(garray);CHKERRQ(ierr); 3227 *submat = M; 3228 3229 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3230 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3231 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3232 3233 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3234 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3235 3236 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3237 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3238 } 3239 PetscFunctionReturn(0); 3240 } 3241 3242 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3243 { 3244 PetscErrorCode ierr; 3245 IS iscol_local=NULL,isrow_d; 3246 PetscInt csize; 3247 PetscInt n,i,j,start,end; 3248 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3249 MPI_Comm comm; 3250 3251 PetscFunctionBegin; 3252 /* If isrow has same processor distribution as mat, 3253 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3254 if (call == MAT_REUSE_MATRIX) { 3255 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3256 if (isrow_d) { 3257 sameRowDist = PETSC_TRUE; 3258 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3259 } else { 3260 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3261 if (iscol_local) { 3262 sameRowDist = PETSC_TRUE; 3263 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3264 } 3265 } 3266 } else { 3267 /* Check if isrow has same processor distribution as mat */ 3268 sameDist[0] = PETSC_FALSE; 3269 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3270 if (!n) { 3271 sameDist[0] = PETSC_TRUE; 3272 } else { 3273 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3274 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3275 if (i >= start && j < end) { 3276 sameDist[0] = PETSC_TRUE; 3277 } 3278 } 3279 3280 /* Check if iscol has same processor distribution as mat */ 3281 sameDist[1] = PETSC_FALSE; 3282 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3283 if (!n) { 3284 sameDist[1] = PETSC_TRUE; 3285 } else { 3286 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3287 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3288 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3289 } 3290 3291 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3292 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3293 sameRowDist = tsameDist[0]; 3294 } 3295 3296 if (sameRowDist) { 3297 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3298 /* isrow and iscol have same processor distribution as mat */ 3299 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } else { /* sameRowDist */ 3302 /* isrow has same processor distribution as mat */ 3303 if (call == MAT_INITIAL_MATRIX) { 3304 PetscBool sorted; 3305 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3306 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3307 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3308 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3309 3310 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3311 if (sorted) { 3312 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3313 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 } else { /* call == MAT_REUSE_MATRIX */ 3317 IS iscol_sub; 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3319 if (iscol_sub) { 3320 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3321 PetscFunctionReturn(0); 3322 } 3323 } 3324 } 3325 } 3326 3327 /* General case: iscol -> iscol_local which has global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3330 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3331 } else { 3332 if (!iscol_local) { 3333 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3334 } 3335 } 3336 3337 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3338 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3339 3340 if (call == MAT_INITIAL_MATRIX) { 3341 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3342 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3343 } 3344 PetscFunctionReturn(0); 3345 } 3346 3347 /*@C 3348 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3349 and "off-diagonal" part of the matrix in CSR format. 3350 3351 Collective 3352 3353 Input Parameters: 3354 + comm - MPI communicator 3355 . A - "diagonal" portion of matrix 3356 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3357 - garray - global index of B columns 3358 3359 Output Parameter: 3360 . mat - the matrix, with input A as its local diagonal matrix 3361 Level: advanced 3362 3363 Notes: 3364 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3365 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3366 3367 .seealso: MatCreateMPIAIJWithSplitArrays() 3368 @*/ 3369 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3370 { 3371 PetscErrorCode ierr; 3372 Mat_MPIAIJ *maij; 3373 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3374 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3375 PetscScalar *oa=b->a; 3376 Mat Bnew; 3377 PetscInt m,n,N; 3378 3379 PetscFunctionBegin; 3380 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3381 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3382 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3383 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3384 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3385 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3386 3387 /* Get global columns of mat */ 3388 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3389 3390 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3391 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3392 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3393 maij = (Mat_MPIAIJ*)(*mat)->data; 3394 3395 (*mat)->preallocated = PETSC_TRUE; 3396 3397 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3398 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3399 3400 /* Set A as diagonal portion of *mat */ 3401 maij->A = A; 3402 3403 nz = oi[m]; 3404 for (i=0; i<nz; i++) { 3405 col = oj[i]; 3406 oj[i] = garray[col]; 3407 } 3408 3409 /* Set Bnew as off-diagonal portion of *mat */ 3410 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3411 bnew = (Mat_SeqAIJ*)Bnew->data; 3412 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3413 maij->B = Bnew; 3414 3415 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3416 3417 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3418 b->free_a = PETSC_FALSE; 3419 b->free_ij = PETSC_FALSE; 3420 ierr = MatDestroy(&B);CHKERRQ(ierr); 3421 3422 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3423 bnew->free_a = PETSC_TRUE; 3424 bnew->free_ij = PETSC_TRUE; 3425 3426 /* condense columns of maij->B */ 3427 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3428 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3429 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3430 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3431 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3432 PetscFunctionReturn(0); 3433 } 3434 3435 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3436 3437 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3438 { 3439 PetscErrorCode ierr; 3440 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3441 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3442 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3443 Mat M,Msub,B=a->B; 3444 MatScalar *aa; 3445 Mat_SeqAIJ *aij; 3446 PetscInt *garray = a->garray,*colsub,Ncols; 3447 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3448 IS iscol_sub,iscmap; 3449 const PetscInt *is_idx,*cmap; 3450 PetscBool allcolumns=PETSC_FALSE; 3451 MPI_Comm comm; 3452 3453 PetscFunctionBegin; 3454 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3455 3456 if (call == MAT_REUSE_MATRIX) { 3457 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3458 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3459 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3460 3461 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3462 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3463 3464 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3465 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3466 3467 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3468 3469 } else { /* call == MAT_INITIAL_MATRIX) */ 3470 PetscBool flg; 3471 3472 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3473 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3474 3475 /* (1) iscol -> nonscalable iscol_local */ 3476 /* Check for special case: each processor gets entire matrix columns */ 3477 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3478 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3479 if (allcolumns) { 3480 iscol_sub = iscol_local; 3481 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3482 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3483 3484 } else { 3485 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3486 PetscInt *idx,*cmap1,k; 3487 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3488 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3489 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3490 count = 0; 3491 k = 0; 3492 for (i=0; i<Ncols; i++) { 3493 j = is_idx[i]; 3494 if (j >= cstart && j < cend) { 3495 /* diagonal part of mat */ 3496 idx[count] = j; 3497 cmap1[count++] = i; /* column index in submat */ 3498 } else if (Bn) { 3499 /* off-diagonal part of mat */ 3500 if (j == garray[k]) { 3501 idx[count] = j; 3502 cmap1[count++] = i; /* column index in submat */ 3503 } else if (j > garray[k]) { 3504 while (j > garray[k] && k < Bn-1) k++; 3505 if (j == garray[k]) { 3506 idx[count] = j; 3507 cmap1[count++] = i; /* column index in submat */ 3508 } 3509 } 3510 } 3511 } 3512 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3513 3514 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3515 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3516 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3517 3518 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3519 } 3520 3521 /* (3) Create sequential Msub */ 3522 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3523 } 3524 3525 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3526 aij = (Mat_SeqAIJ*)(Msub)->data; 3527 ii = aij->i; 3528 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3529 3530 /* 3531 m - number of local rows 3532 Ncols - number of columns (same on all processors) 3533 rstart - first row in new global matrix generated 3534 */ 3535 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3536 3537 if (call == MAT_INITIAL_MATRIX) { 3538 /* (4) Create parallel newmat */ 3539 PetscMPIInt rank,size; 3540 PetscInt csize; 3541 3542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3544 3545 /* 3546 Determine the number of non-zeros in the diagonal and off-diagonal 3547 portions of the matrix in order to do correct preallocation 3548 */ 3549 3550 /* first get start and end of "diagonal" columns */ 3551 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3552 if (csize == PETSC_DECIDE) { 3553 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3554 if (mglobal == Ncols) { /* square matrix */ 3555 nlocal = m; 3556 } else { 3557 nlocal = Ncols/size + ((Ncols % size) > rank); 3558 } 3559 } else { 3560 nlocal = csize; 3561 } 3562 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3563 rstart = rend - nlocal; 3564 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3565 3566 /* next, compute all the lengths */ 3567 jj = aij->j; 3568 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3569 olens = dlens + m; 3570 for (i=0; i<m; i++) { 3571 jend = ii[i+1] - ii[i]; 3572 olen = 0; 3573 dlen = 0; 3574 for (j=0; j<jend; j++) { 3575 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3576 else dlen++; 3577 jj++; 3578 } 3579 olens[i] = olen; 3580 dlens[i] = dlen; 3581 } 3582 3583 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3584 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3585 3586 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3587 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3588 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3589 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3590 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3591 ierr = PetscFree(dlens);CHKERRQ(ierr); 3592 3593 } else { /* call == MAT_REUSE_MATRIX */ 3594 M = *newmat; 3595 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3596 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3597 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3598 /* 3599 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3600 rather than the slower MatSetValues(). 3601 */ 3602 M->was_assembled = PETSC_TRUE; 3603 M->assembled = PETSC_FALSE; 3604 } 3605 3606 /* (5) Set values of Msub to *newmat */ 3607 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3608 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3609 3610 jj = aij->j; 3611 aa = aij->a; 3612 for (i=0; i<m; i++) { 3613 row = rstart + i; 3614 nz = ii[i+1] - ii[i]; 3615 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3616 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3617 jj += nz; aa += nz; 3618 } 3619 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3620 3621 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 3624 ierr = PetscFree(colsub);CHKERRQ(ierr); 3625 3626 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3627 if (call == MAT_INITIAL_MATRIX) { 3628 *newmat = M; 3629 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3630 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3631 3632 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3633 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3634 3635 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3636 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3637 3638 if (iscol_local) { 3639 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3640 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3641 } 3642 } 3643 PetscFunctionReturn(0); 3644 } 3645 3646 /* 3647 Not great since it makes two copies of the submatrix, first an SeqAIJ 3648 in local and then by concatenating the local matrices the end result. 3649 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3650 3651 Note: This requires a sequential iscol with all indices. 3652 */ 3653 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3654 { 3655 PetscErrorCode ierr; 3656 PetscMPIInt rank,size; 3657 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3658 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3659 Mat M,Mreuse; 3660 MatScalar *aa,*vwork; 3661 MPI_Comm comm; 3662 Mat_SeqAIJ *aij; 3663 PetscBool colflag,allcolumns=PETSC_FALSE; 3664 3665 PetscFunctionBegin; 3666 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3669 3670 /* Check for special case: each processor gets entire matrix columns */ 3671 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3672 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3673 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3674 3675 if (call == MAT_REUSE_MATRIX) { 3676 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3677 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3678 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } else { 3680 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3681 } 3682 3683 /* 3684 m - number of local rows 3685 n - number of columns (same on all processors) 3686 rstart - first row in new global matrix generated 3687 */ 3688 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3689 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3690 if (call == MAT_INITIAL_MATRIX) { 3691 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3692 ii = aij->i; 3693 jj = aij->j; 3694 3695 /* 3696 Determine the number of non-zeros in the diagonal and off-diagonal 3697 portions of the matrix in order to do correct preallocation 3698 */ 3699 3700 /* first get start and end of "diagonal" columns */ 3701 if (csize == PETSC_DECIDE) { 3702 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3703 if (mglobal == n) { /* square matrix */ 3704 nlocal = m; 3705 } else { 3706 nlocal = n/size + ((n % size) > rank); 3707 } 3708 } else { 3709 nlocal = csize; 3710 } 3711 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3712 rstart = rend - nlocal; 3713 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3714 3715 /* next, compute all the lengths */ 3716 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3717 olens = dlens + m; 3718 for (i=0; i<m; i++) { 3719 jend = ii[i+1] - ii[i]; 3720 olen = 0; 3721 dlen = 0; 3722 for (j=0; j<jend; j++) { 3723 if (*jj < rstart || *jj >= rend) olen++; 3724 else dlen++; 3725 jj++; 3726 } 3727 olens[i] = olen; 3728 dlens[i] = dlen; 3729 } 3730 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3731 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3732 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3733 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3734 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3735 ierr = PetscFree(dlens);CHKERRQ(ierr); 3736 } else { 3737 PetscInt ml,nl; 3738 3739 M = *newmat; 3740 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3741 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3742 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3743 /* 3744 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3745 rather than the slower MatSetValues(). 3746 */ 3747 M->was_assembled = PETSC_TRUE; 3748 M->assembled = PETSC_FALSE; 3749 } 3750 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3751 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3752 ii = aij->i; 3753 jj = aij->j; 3754 aa = aij->a; 3755 for (i=0; i<m; i++) { 3756 row = rstart + i; 3757 nz = ii[i+1] - ii[i]; 3758 cwork = jj; jj += nz; 3759 vwork = aa; aa += nz; 3760 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3761 } 3762 3763 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 *newmat = M; 3766 3767 /* save submatrix used in processor for next request */ 3768 if (call == MAT_INITIAL_MATRIX) { 3769 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3770 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3776 { 3777 PetscInt m,cstart, cend,j,nnz,i,d; 3778 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3779 const PetscInt *JJ; 3780 PetscErrorCode ierr; 3781 PetscBool nooffprocentries; 3782 3783 PetscFunctionBegin; 3784 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3785 3786 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3787 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3788 m = B->rmap->n; 3789 cstart = B->cmap->rstart; 3790 cend = B->cmap->rend; 3791 rstart = B->rmap->rstart; 3792 3793 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3794 3795 if (PetscDefined(USE_DEBUG)) { 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3800 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3801 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3802 } 3803 } 3804 3805 for (i=0; i<m; i++) { 3806 nnz = Ii[i+1]- Ii[i]; 3807 JJ = J + Ii[i]; 3808 nnz_max = PetscMax(nnz_max,nnz); 3809 d = 0; 3810 for (j=0; j<nnz; j++) { 3811 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3812 } 3813 d_nnz[i] = d; 3814 o_nnz[i] = nnz - d; 3815 } 3816 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3817 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3818 3819 for (i=0; i<m; i++) { 3820 ii = i + rstart; 3821 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3822 } 3823 nooffprocentries = B->nooffprocentries; 3824 B->nooffprocentries = PETSC_TRUE; 3825 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3826 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3827 B->nooffprocentries = nooffprocentries; 3828 3829 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832 3833 /*@ 3834 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3835 (the default parallel PETSc format). 3836 3837 Collective 3838 3839 Input Parameters: 3840 + B - the matrix 3841 . i - the indices into j for the start of each local row (starts with zero) 3842 . j - the column indices for each local row (starts with zero) 3843 - v - optional values in the matrix 3844 3845 Level: developer 3846 3847 Notes: 3848 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3849 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3850 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3851 3852 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3853 3854 The format which is used for the sparse matrix input, is equivalent to a 3855 row-major ordering.. i.e for the following matrix, the input data expected is 3856 as shown 3857 3858 $ 1 0 0 3859 $ 2 0 3 P0 3860 $ ------- 3861 $ 4 5 6 P1 3862 $ 3863 $ Process0 [P0]: rows_owned=[0,1] 3864 $ i = {0,1,3} [size = nrow+1 = 2+1] 3865 $ j = {0,0,2} [size = 3] 3866 $ v = {1,2,3} [size = 3] 3867 $ 3868 $ Process1 [P1]: rows_owned=[2] 3869 $ i = {0,3} [size = nrow+1 = 1+1] 3870 $ j = {0,1,2} [size = 3] 3871 $ v = {4,5,6} [size = 3] 3872 3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3874 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3875 @*/ 3876 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3877 { 3878 PetscErrorCode ierr; 3879 3880 PetscFunctionBegin; 3881 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3882 PetscFunctionReturn(0); 3883 } 3884 3885 /*@C 3886 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3887 (the default parallel PETSc format). For good matrix assembly performance 3888 the user should preallocate the matrix storage by setting the parameters 3889 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3890 performance can be increased by more than a factor of 50. 3891 3892 Collective 3893 3894 Input Parameters: 3895 + B - the matrix 3896 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3897 (same value is used for all local rows) 3898 . d_nnz - array containing the number of nonzeros in the various rows of the 3899 DIAGONAL portion of the local submatrix (possibly different for each row) 3900 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3901 The size of this array is equal to the number of local rows, i.e 'm'. 3902 For matrices that will be factored, you must leave room for (and set) 3903 the diagonal entry even if it is zero. 3904 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3905 submatrix (same value is used for all local rows). 3906 - o_nnz - array containing the number of nonzeros in the various rows of the 3907 OFF-DIAGONAL portion of the local submatrix (possibly different for 3908 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3909 structure. The size of this array is equal to the number 3910 of local rows, i.e 'm'. 3911 3912 If the *_nnz parameter is given then the *_nz parameter is ignored 3913 3914 The AIJ format (also called the Yale sparse matrix format or 3915 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3916 storage. The stored row and column indices begin with zero. 3917 See Users-Manual: ch_mat for details. 3918 3919 The parallel matrix is partitioned such that the first m0 rows belong to 3920 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3921 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3922 3923 The DIAGONAL portion of the local submatrix of a processor can be defined 3924 as the submatrix which is obtained by extraction the part corresponding to 3925 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3926 first row that belongs to the processor, r2 is the last row belonging to 3927 the this processor, and c1-c2 is range of indices of the local part of a 3928 vector suitable for applying the matrix to. This is an mxn matrix. In the 3929 common case of a square matrix, the row and column ranges are the same and 3930 the DIAGONAL part is also square. The remaining portion of the local 3931 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3932 3933 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3934 3935 You can call MatGetInfo() to get information on how effective the preallocation was; 3936 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3937 You can also run with the option -info and look for messages with the string 3938 malloc in them to see if additional memory allocation was needed. 3939 3940 Example usage: 3941 3942 Consider the following 8x8 matrix with 34 non-zero values, that is 3943 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3944 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3945 as follows: 3946 3947 .vb 3948 1 2 0 | 0 3 0 | 0 4 3949 Proc0 0 5 6 | 7 0 0 | 8 0 3950 9 0 10 | 11 0 0 | 12 0 3951 ------------------------------------- 3952 13 0 14 | 15 16 17 | 0 0 3953 Proc1 0 18 0 | 19 20 21 | 0 0 3954 0 0 0 | 22 23 0 | 24 0 3955 ------------------------------------- 3956 Proc2 25 26 27 | 0 0 28 | 29 0 3957 30 0 0 | 31 32 33 | 0 34 3958 .ve 3959 3960 This can be represented as a collection of submatrices as: 3961 3962 .vb 3963 A B C 3964 D E F 3965 G H I 3966 .ve 3967 3968 Where the submatrices A,B,C are owned by proc0, D,E,F are 3969 owned by proc1, G,H,I are owned by proc2. 3970 3971 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3972 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3973 The 'M','N' parameters are 8,8, and have the same values on all procs. 3974 3975 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3976 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3977 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3978 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3979 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3980 matrix, ans [DF] as another SeqAIJ matrix. 3981 3982 When d_nz, o_nz parameters are specified, d_nz storage elements are 3983 allocated for every row of the local diagonal submatrix, and o_nz 3984 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3985 One way to choose d_nz and o_nz is to use the max nonzerors per local 3986 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3987 In this case, the values of d_nz,o_nz are: 3988 .vb 3989 proc0 : dnz = 2, o_nz = 2 3990 proc1 : dnz = 3, o_nz = 2 3991 proc2 : dnz = 1, o_nz = 4 3992 .ve 3993 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3994 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3995 for proc3. i.e we are using 12+15+10=37 storage locations to store 3996 34 values. 3997 3998 When d_nnz, o_nnz parameters are specified, the storage is specified 3999 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4000 In the above case the values for d_nnz,o_nnz are: 4001 .vb 4002 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4003 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4004 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4005 .ve 4006 Here the space allocated is sum of all the above values i.e 34, and 4007 hence pre-allocation is perfect. 4008 4009 Level: intermediate 4010 4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4012 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4013 @*/ 4014 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4015 { 4016 PetscErrorCode ierr; 4017 4018 PetscFunctionBegin; 4019 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4020 PetscValidType(B,1); 4021 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4022 PetscFunctionReturn(0); 4023 } 4024 4025 /*@ 4026 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4027 CSR format for the local rows. 4028 4029 Collective 4030 4031 Input Parameters: 4032 + comm - MPI communicator 4033 . m - number of local rows (Cannot be PETSC_DECIDE) 4034 . n - This value should be the same as the local size used in creating the 4035 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4036 calculated if N is given) For square matrices n is almost always m. 4037 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4038 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4039 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4040 . j - column indices 4041 - a - matrix values 4042 4043 Output Parameter: 4044 . mat - the matrix 4045 4046 Level: intermediate 4047 4048 Notes: 4049 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4050 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4051 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4052 4053 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4054 4055 The format which is used for the sparse matrix input, is equivalent to a 4056 row-major ordering.. i.e for the following matrix, the input data expected is 4057 as shown 4058 4059 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4060 4061 $ 1 0 0 4062 $ 2 0 3 P0 4063 $ ------- 4064 $ 4 5 6 P1 4065 $ 4066 $ Process0 [P0]: rows_owned=[0,1] 4067 $ i = {0,1,3} [size = nrow+1 = 2+1] 4068 $ j = {0,0,2} [size = 3] 4069 $ v = {1,2,3} [size = 3] 4070 $ 4071 $ Process1 [P1]: rows_owned=[2] 4072 $ i = {0,3} [size = nrow+1 = 1+1] 4073 $ j = {0,1,2} [size = 3] 4074 $ v = {4,5,6} [size = 3] 4075 4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4077 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4078 @*/ 4079 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4080 { 4081 PetscErrorCode ierr; 4082 4083 PetscFunctionBegin; 4084 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4085 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4086 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4087 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4088 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4089 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4090 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4091 PetscFunctionReturn(0); 4092 } 4093 4094 /*@ 4095 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4096 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4097 4098 Collective 4099 4100 Input Parameters: 4101 + mat - the matrix 4102 . m - number of local rows (Cannot be PETSC_DECIDE) 4103 . n - This value should be the same as the local size used in creating the 4104 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4105 calculated if N is given) For square matrices n is almost always m. 4106 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4107 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4108 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4109 . J - column indices 4110 - v - matrix values 4111 4112 Level: intermediate 4113 4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4115 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4116 @*/ 4117 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4118 { 4119 PetscErrorCode ierr; 4120 PetscInt cstart,nnz,i,j; 4121 PetscInt *ld; 4122 PetscBool nooffprocentries; 4123 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4124 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4125 PetscScalar *ad = Ad->a, *ao = Ao->a; 4126 const PetscInt *Adi = Ad->i; 4127 PetscInt ldi,Iii,md; 4128 4129 PetscFunctionBegin; 4130 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4131 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4132 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4133 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4134 4135 cstart = mat->cmap->rstart; 4136 if (!Aij->ld) { 4137 /* count number of entries below block diagonal */ 4138 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4139 Aij->ld = ld; 4140 for (i=0; i<m; i++) { 4141 nnz = Ii[i+1]- Ii[i]; 4142 j = 0; 4143 while (J[j] < cstart && j < nnz) {j++;} 4144 J += nnz; 4145 ld[i] = j; 4146 } 4147 } else { 4148 ld = Aij->ld; 4149 } 4150 4151 for (i=0; i<m; i++) { 4152 nnz = Ii[i+1]- Ii[i]; 4153 Iii = Ii[i]; 4154 ldi = ld[i]; 4155 md = Adi[i+1]-Adi[i]; 4156 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4157 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4158 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4159 ad += md; 4160 ao += nnz - md; 4161 } 4162 nooffprocentries = mat->nooffprocentries; 4163 mat->nooffprocentries = PETSC_TRUE; 4164 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4165 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4166 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4167 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4168 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4169 mat->nooffprocentries = nooffprocentries; 4170 PetscFunctionReturn(0); 4171 } 4172 4173 /*@C 4174 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4175 (the default parallel PETSc format). For good matrix assembly performance 4176 the user should preallocate the matrix storage by setting the parameters 4177 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4178 performance can be increased by more than a factor of 50. 4179 4180 Collective 4181 4182 Input Parameters: 4183 + comm - MPI communicator 4184 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4185 This value should be the same as the local size used in creating the 4186 y vector for the matrix-vector product y = Ax. 4187 . n - This value should be the same as the local size used in creating the 4188 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4189 calculated if N is given) For square matrices n is almost always m. 4190 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4191 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4192 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4193 (same value is used for all local rows) 4194 . d_nnz - array containing the number of nonzeros in the various rows of the 4195 DIAGONAL portion of the local submatrix (possibly different for each row) 4196 or NULL, if d_nz is used to specify the nonzero structure. 4197 The size of this array is equal to the number of local rows, i.e 'm'. 4198 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4199 submatrix (same value is used for all local rows). 4200 - o_nnz - array containing the number of nonzeros in the various rows of the 4201 OFF-DIAGONAL portion of the local submatrix (possibly different for 4202 each row) or NULL, if o_nz is used to specify the nonzero 4203 structure. The size of this array is equal to the number 4204 of local rows, i.e 'm'. 4205 4206 Output Parameter: 4207 . A - the matrix 4208 4209 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4210 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4211 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4212 4213 Notes: 4214 If the *_nnz parameter is given then the *_nz parameter is ignored 4215 4216 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4217 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4218 storage requirements for this matrix. 4219 4220 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4221 processor than it must be used on all processors that share the object for 4222 that argument. 4223 4224 The user MUST specify either the local or global matrix dimensions 4225 (possibly both). 4226 4227 The parallel matrix is partitioned across processors such that the 4228 first m0 rows belong to process 0, the next m1 rows belong to 4229 process 1, the next m2 rows belong to process 2 etc.. where 4230 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4231 values corresponding to [m x N] submatrix. 4232 4233 The columns are logically partitioned with the n0 columns belonging 4234 to 0th partition, the next n1 columns belonging to the next 4235 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4236 4237 The DIAGONAL portion of the local submatrix on any given processor 4238 is the submatrix corresponding to the rows and columns m,n 4239 corresponding to the given processor. i.e diagonal matrix on 4240 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4241 etc. The remaining portion of the local submatrix [m x (N-n)] 4242 constitute the OFF-DIAGONAL portion. The example below better 4243 illustrates this concept. 4244 4245 For a square global matrix we define each processor's diagonal portion 4246 to be its local rows and the corresponding columns (a square submatrix); 4247 each processor's off-diagonal portion encompasses the remainder of the 4248 local matrix (a rectangular submatrix). 4249 4250 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4251 4252 When calling this routine with a single process communicator, a matrix of 4253 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4254 type of communicator, use the construction mechanism 4255 .vb 4256 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4257 .ve 4258 4259 $ MatCreate(...,&A); 4260 $ MatSetType(A,MATMPIAIJ); 4261 $ MatSetSizes(A, m,n,M,N); 4262 $ MatMPIAIJSetPreallocation(A,...); 4263 4264 By default, this format uses inodes (identical nodes) when possible. 4265 We search for consecutive rows with the same nonzero structure, thereby 4266 reusing matrix information to achieve increased efficiency. 4267 4268 Options Database Keys: 4269 + -mat_no_inode - Do not use inodes 4270 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4271 4272 4273 4274 Example usage: 4275 4276 Consider the following 8x8 matrix with 34 non-zero values, that is 4277 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4278 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4279 as follows 4280 4281 .vb 4282 1 2 0 | 0 3 0 | 0 4 4283 Proc0 0 5 6 | 7 0 0 | 8 0 4284 9 0 10 | 11 0 0 | 12 0 4285 ------------------------------------- 4286 13 0 14 | 15 16 17 | 0 0 4287 Proc1 0 18 0 | 19 20 21 | 0 0 4288 0 0 0 | 22 23 0 | 24 0 4289 ------------------------------------- 4290 Proc2 25 26 27 | 0 0 28 | 29 0 4291 30 0 0 | 31 32 33 | 0 34 4292 .ve 4293 4294 This can be represented as a collection of submatrices as 4295 4296 .vb 4297 A B C 4298 D E F 4299 G H I 4300 .ve 4301 4302 Where the submatrices A,B,C are owned by proc0, D,E,F are 4303 owned by proc1, G,H,I are owned by proc2. 4304 4305 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4306 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4307 The 'M','N' parameters are 8,8, and have the same values on all procs. 4308 4309 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4310 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4311 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4312 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4313 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4314 matrix, ans [DF] as another SeqAIJ matrix. 4315 4316 When d_nz, o_nz parameters are specified, d_nz storage elements are 4317 allocated for every row of the local diagonal submatrix, and o_nz 4318 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4319 One way to choose d_nz and o_nz is to use the max nonzerors per local 4320 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4321 In this case, the values of d_nz,o_nz are 4322 .vb 4323 proc0 : dnz = 2, o_nz = 2 4324 proc1 : dnz = 3, o_nz = 2 4325 proc2 : dnz = 1, o_nz = 4 4326 .ve 4327 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4328 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4329 for proc3. i.e we are using 12+15+10=37 storage locations to store 4330 34 values. 4331 4332 When d_nnz, o_nnz parameters are specified, the storage is specified 4333 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4334 In the above case the values for d_nnz,o_nnz are 4335 .vb 4336 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4337 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4338 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4339 .ve 4340 Here the space allocated is sum of all the above values i.e 34, and 4341 hence pre-allocation is perfect. 4342 4343 Level: intermediate 4344 4345 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4346 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4347 @*/ 4348 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4349 { 4350 PetscErrorCode ierr; 4351 PetscMPIInt size; 4352 4353 PetscFunctionBegin; 4354 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4355 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4356 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4357 if (size > 1) { 4358 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4359 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4360 } else { 4361 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4362 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4363 } 4364 PetscFunctionReturn(0); 4365 } 4366 4367 /*@C 4368 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4369 4370 Not collective 4371 4372 Input Parameter: 4373 . A - The MPIAIJ matrix 4374 4375 Output Parameters: 4376 + Ad - The local diagonal block as a SeqAIJ matrix 4377 . Ao - The local off-diagonal block as a SeqAIJ matrix 4378 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4379 4380 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4381 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4382 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4383 local column numbers to global column numbers in the original matrix. 4384 4385 Level: intermediate 4386 4387 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4388 @*/ 4389 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4390 { 4391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4392 PetscBool flg; 4393 PetscErrorCode ierr; 4394 4395 PetscFunctionBegin; 4396 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4397 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4398 if (Ad) *Ad = a->A; 4399 if (Ao) *Ao = a->B; 4400 if (colmap) *colmap = a->garray; 4401 PetscFunctionReturn(0); 4402 } 4403 4404 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4405 { 4406 PetscErrorCode ierr; 4407 PetscInt m,N,i,rstart,nnz,Ii; 4408 PetscInt *indx; 4409 PetscScalar *values; 4410 4411 PetscFunctionBegin; 4412 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4413 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4414 PetscInt *dnz,*onz,sum,bs,cbs; 4415 4416 if (n == PETSC_DECIDE) { 4417 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4418 } 4419 /* Check sum(n) = N */ 4420 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4421 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4422 4423 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4424 rstart -= m; 4425 4426 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4427 for (i=0; i<m; i++) { 4428 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4429 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4430 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4431 } 4432 4433 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4434 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4435 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4436 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4437 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4438 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4439 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4440 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4441 } 4442 4443 /* numeric phase */ 4444 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4445 for (i=0; i<m; i++) { 4446 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4447 Ii = i + rstart; 4448 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4449 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4450 } 4451 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 PetscFunctionReturn(0); 4454 } 4455 4456 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4457 { 4458 PetscErrorCode ierr; 4459 PetscMPIInt rank; 4460 PetscInt m,N,i,rstart,nnz; 4461 size_t len; 4462 const PetscInt *indx; 4463 PetscViewer out; 4464 char *name; 4465 Mat B; 4466 const PetscScalar *values; 4467 4468 PetscFunctionBegin; 4469 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4470 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4471 /* Should this be the type of the diagonal block of A? */ 4472 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4473 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4474 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4475 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4476 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4477 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4478 for (i=0; i<m; i++) { 4479 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4480 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4481 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4482 } 4483 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4484 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4485 4486 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4487 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4488 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4489 sprintf(name,"%s.%d",outfile,rank); 4490 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4491 ierr = PetscFree(name);CHKERRQ(ierr); 4492 ierr = MatView(B,out);CHKERRQ(ierr); 4493 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4494 ierr = MatDestroy(&B);CHKERRQ(ierr); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4499 { 4500 PetscErrorCode ierr; 4501 Mat_Merge_SeqsToMPI *merge; 4502 PetscContainer container; 4503 4504 PetscFunctionBegin; 4505 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4506 if (container) { 4507 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4511 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4516 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4517 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4518 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4519 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4520 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4521 ierr = PetscFree(merge);CHKERRQ(ierr); 4522 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4523 } 4524 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4525 PetscFunctionReturn(0); 4526 } 4527 4528 #include <../src/mat/utils/freespace.h> 4529 #include <petscbt.h> 4530 4531 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4532 { 4533 PetscErrorCode ierr; 4534 MPI_Comm comm; 4535 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4536 PetscMPIInt size,rank,taga,*len_s; 4537 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4538 PetscInt proc,m; 4539 PetscInt **buf_ri,**buf_rj; 4540 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4541 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4542 MPI_Request *s_waits,*r_waits; 4543 MPI_Status *status; 4544 MatScalar *aa=a->a; 4545 MatScalar **abuf_r,*ba_i; 4546 Mat_Merge_SeqsToMPI *merge; 4547 PetscContainer container; 4548 4549 PetscFunctionBegin; 4550 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4551 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4552 4553 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4554 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4555 4556 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4557 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4558 4559 bi = merge->bi; 4560 bj = merge->bj; 4561 buf_ri = merge->buf_ri; 4562 buf_rj = merge->buf_rj; 4563 4564 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4565 owners = merge->rowmap->range; 4566 len_s = merge->len_s; 4567 4568 /* send and recv matrix values */ 4569 /*-----------------------------*/ 4570 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4571 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4572 4573 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4574 for (proc=0,k=0; proc<size; proc++) { 4575 if (!len_s[proc]) continue; 4576 i = owners[proc]; 4577 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4578 k++; 4579 } 4580 4581 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4582 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4583 ierr = PetscFree(status);CHKERRQ(ierr); 4584 4585 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4586 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4587 4588 /* insert mat values of mpimat */ 4589 /*----------------------------*/ 4590 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4591 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4592 4593 for (k=0; k<merge->nrecv; k++) { 4594 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4595 nrows = *(buf_ri_k[k]); 4596 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4597 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4598 } 4599 4600 /* set values of ba */ 4601 m = merge->rowmap->n; 4602 for (i=0; i<m; i++) { 4603 arow = owners[rank] + i; 4604 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4605 bnzi = bi[i+1] - bi[i]; 4606 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4607 4608 /* add local non-zero vals of this proc's seqmat into ba */ 4609 anzi = ai[arow+1] - ai[arow]; 4610 aj = a->j + ai[arow]; 4611 aa = a->a + ai[arow]; 4612 nextaj = 0; 4613 for (j=0; nextaj<anzi; j++) { 4614 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4615 ba_i[j] += aa[nextaj++]; 4616 } 4617 } 4618 4619 /* add received vals into ba */ 4620 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4621 /* i-th row */ 4622 if (i == *nextrow[k]) { 4623 anzi = *(nextai[k]+1) - *nextai[k]; 4624 aj = buf_rj[k] + *(nextai[k]); 4625 aa = abuf_r[k] + *(nextai[k]); 4626 nextaj = 0; 4627 for (j=0; nextaj<anzi; j++) { 4628 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4629 ba_i[j] += aa[nextaj++]; 4630 } 4631 } 4632 nextrow[k]++; nextai[k]++; 4633 } 4634 } 4635 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4636 } 4637 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4638 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4639 4640 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4641 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4642 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4643 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4644 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4645 PetscFunctionReturn(0); 4646 } 4647 4648 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4649 { 4650 PetscErrorCode ierr; 4651 Mat B_mpi; 4652 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4653 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4654 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4655 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4656 PetscInt len,proc,*dnz,*onz,bs,cbs; 4657 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4658 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4659 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4660 MPI_Status *status; 4661 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4662 PetscBT lnkbt; 4663 Mat_Merge_SeqsToMPI *merge; 4664 PetscContainer container; 4665 4666 PetscFunctionBegin; 4667 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4668 4669 /* make sure it is a PETSc comm */ 4670 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4672 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4673 4674 ierr = PetscNew(&merge);CHKERRQ(ierr); 4675 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4676 4677 /* determine row ownership */ 4678 /*---------------------------------------------------------*/ 4679 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4680 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4681 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4682 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4683 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4684 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4685 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4686 4687 m = merge->rowmap->n; 4688 owners = merge->rowmap->range; 4689 4690 /* determine the number of messages to send, their lengths */ 4691 /*---------------------------------------------------------*/ 4692 len_s = merge->len_s; 4693 4694 len = 0; /* length of buf_si[] */ 4695 merge->nsend = 0; 4696 for (proc=0; proc<size; proc++) { 4697 len_si[proc] = 0; 4698 if (proc == rank) { 4699 len_s[proc] = 0; 4700 } else { 4701 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4702 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4703 } 4704 if (len_s[proc]) { 4705 merge->nsend++; 4706 nrows = 0; 4707 for (i=owners[proc]; i<owners[proc+1]; i++) { 4708 if (ai[i+1] > ai[i]) nrows++; 4709 } 4710 len_si[proc] = 2*(nrows+1); 4711 len += len_si[proc]; 4712 } 4713 } 4714 4715 /* determine the number and length of messages to receive for ij-structure */ 4716 /*-------------------------------------------------------------------------*/ 4717 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4718 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4719 4720 /* post the Irecv of j-structure */ 4721 /*-------------------------------*/ 4722 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4723 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4724 4725 /* post the Isend of j-structure */ 4726 /*--------------------------------*/ 4727 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4728 4729 for (proc=0, k=0; proc<size; proc++) { 4730 if (!len_s[proc]) continue; 4731 i = owners[proc]; 4732 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4733 k++; 4734 } 4735 4736 /* receives and sends of j-structure are complete */ 4737 /*------------------------------------------------*/ 4738 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4739 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4740 4741 /* send and recv i-structure */ 4742 /*---------------------------*/ 4743 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4744 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4745 4746 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4747 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4748 for (proc=0,k=0; proc<size; proc++) { 4749 if (!len_s[proc]) continue; 4750 /* form outgoing message for i-structure: 4751 buf_si[0]: nrows to be sent 4752 [1:nrows]: row index (global) 4753 [nrows+1:2*nrows+1]: i-structure index 4754 */ 4755 /*-------------------------------------------*/ 4756 nrows = len_si[proc]/2 - 1; 4757 buf_si_i = buf_si + nrows+1; 4758 buf_si[0] = nrows; 4759 buf_si_i[0] = 0; 4760 nrows = 0; 4761 for (i=owners[proc]; i<owners[proc+1]; i++) { 4762 anzi = ai[i+1] - ai[i]; 4763 if (anzi) { 4764 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4765 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4766 nrows++; 4767 } 4768 } 4769 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4770 k++; 4771 buf_si += len_si[proc]; 4772 } 4773 4774 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4775 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4776 4777 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4778 for (i=0; i<merge->nrecv; i++) { 4779 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4780 } 4781 4782 ierr = PetscFree(len_si);CHKERRQ(ierr); 4783 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4784 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4785 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4786 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4787 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4788 ierr = PetscFree(status);CHKERRQ(ierr); 4789 4790 /* compute a local seq matrix in each processor */ 4791 /*----------------------------------------------*/ 4792 /* allocate bi array and free space for accumulating nonzero column info */ 4793 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4794 bi[0] = 0; 4795 4796 /* create and initialize a linked list */ 4797 nlnk = N+1; 4798 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4799 4800 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4801 len = ai[owners[rank+1]] - ai[owners[rank]]; 4802 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4803 4804 current_space = free_space; 4805 4806 /* determine symbolic info for each local row */ 4807 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4808 4809 for (k=0; k<merge->nrecv; k++) { 4810 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4811 nrows = *buf_ri_k[k]; 4812 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4813 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4814 } 4815 4816 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4817 len = 0; 4818 for (i=0; i<m; i++) { 4819 bnzi = 0; 4820 /* add local non-zero cols of this proc's seqmat into lnk */ 4821 arow = owners[rank] + i; 4822 anzi = ai[arow+1] - ai[arow]; 4823 aj = a->j + ai[arow]; 4824 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4825 bnzi += nlnk; 4826 /* add received col data into lnk */ 4827 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4828 if (i == *nextrow[k]) { /* i-th row */ 4829 anzi = *(nextai[k]+1) - *nextai[k]; 4830 aj = buf_rj[k] + *nextai[k]; 4831 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4832 bnzi += nlnk; 4833 nextrow[k]++; nextai[k]++; 4834 } 4835 } 4836 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4837 4838 /* if free space is not available, make more free space */ 4839 if (current_space->local_remaining<bnzi) { 4840 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4841 nspacedouble++; 4842 } 4843 /* copy data into free space, then initialize lnk */ 4844 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4845 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4846 4847 current_space->array += bnzi; 4848 current_space->local_used += bnzi; 4849 current_space->local_remaining -= bnzi; 4850 4851 bi[i+1] = bi[i] + bnzi; 4852 } 4853 4854 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4855 4856 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4857 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4858 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4859 4860 /* create symbolic parallel matrix B_mpi */ 4861 /*---------------------------------------*/ 4862 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4863 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4864 if (n==PETSC_DECIDE) { 4865 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4866 } else { 4867 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4868 } 4869 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4870 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4871 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4872 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4873 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4874 4875 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4876 B_mpi->assembled = PETSC_FALSE; 4877 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4878 merge->bi = bi; 4879 merge->bj = bj; 4880 merge->buf_ri = buf_ri; 4881 merge->buf_rj = buf_rj; 4882 merge->coi = NULL; 4883 merge->coj = NULL; 4884 merge->owners_co = NULL; 4885 4886 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4887 4888 /* attach the supporting struct to B_mpi for reuse */ 4889 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4890 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4891 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4892 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4893 *mpimat = B_mpi; 4894 4895 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4896 PetscFunctionReturn(0); 4897 } 4898 4899 /*@C 4900 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4901 matrices from each processor 4902 4903 Collective 4904 4905 Input Parameters: 4906 + comm - the communicators the parallel matrix will live on 4907 . seqmat - the input sequential matrices 4908 . m - number of local rows (or PETSC_DECIDE) 4909 . n - number of local columns (or PETSC_DECIDE) 4910 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4911 4912 Output Parameter: 4913 . mpimat - the parallel matrix generated 4914 4915 Level: advanced 4916 4917 Notes: 4918 The dimensions of the sequential matrix in each processor MUST be the same. 4919 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4920 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4921 @*/ 4922 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4923 { 4924 PetscErrorCode ierr; 4925 PetscMPIInt size; 4926 4927 PetscFunctionBegin; 4928 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4929 if (size == 1) { 4930 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4931 if (scall == MAT_INITIAL_MATRIX) { 4932 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4933 } else { 4934 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4935 } 4936 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4937 PetscFunctionReturn(0); 4938 } 4939 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4940 if (scall == MAT_INITIAL_MATRIX) { 4941 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4942 } 4943 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4944 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 /*@ 4949 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4950 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4951 with MatGetSize() 4952 4953 Not Collective 4954 4955 Input Parameters: 4956 + A - the matrix 4957 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4958 4959 Output Parameter: 4960 . A_loc - the local sequential matrix generated 4961 4962 Level: developer 4963 4964 Notes: 4965 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4966 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4967 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4968 modify the values of the returned A_loc. 4969 4970 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4971 4972 @*/ 4973 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4974 { 4975 PetscErrorCode ierr; 4976 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4977 Mat_SeqAIJ *mat,*a,*b; 4978 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4979 MatScalar *aa,*ba,*cam; 4980 PetscScalar *ca; 4981 PetscMPIInt size; 4982 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4983 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4984 PetscBool match; 4985 4986 PetscFunctionBegin; 4987 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4988 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4989 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4990 if (size == 1) { 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4993 *A_loc = mpimat->A; 4994 } else if (scall == MAT_REUSE_MATRIX) { 4995 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4996 } 4997 PetscFunctionReturn(0); 4998 } 4999 5000 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5001 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5002 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5003 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5004 aa = a->a; ba = b->a; 5005 if (scall == MAT_INITIAL_MATRIX) { 5006 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5007 ci[0] = 0; 5008 for (i=0; i<am; i++) { 5009 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5010 } 5011 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5012 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5013 k = 0; 5014 for (i=0; i<am; i++) { 5015 ncols_o = bi[i+1] - bi[i]; 5016 ncols_d = ai[i+1] - ai[i]; 5017 /* off-diagonal portion of A */ 5018 for (jo=0; jo<ncols_o; jo++) { 5019 col = cmap[*bj]; 5020 if (col >= cstart) break; 5021 cj[k] = col; bj++; 5022 ca[k++] = *ba++; 5023 } 5024 /* diagonal portion of A */ 5025 for (j=0; j<ncols_d; j++) { 5026 cj[k] = cstart + *aj++; 5027 ca[k++] = *aa++; 5028 } 5029 /* off-diagonal portion of A */ 5030 for (j=jo; j<ncols_o; j++) { 5031 cj[k] = cmap[*bj++]; 5032 ca[k++] = *ba++; 5033 } 5034 } 5035 /* put together the new matrix */ 5036 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5037 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5038 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5039 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5040 mat->free_a = PETSC_TRUE; 5041 mat->free_ij = PETSC_TRUE; 5042 mat->nonew = 0; 5043 } else if (scall == MAT_REUSE_MATRIX) { 5044 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5045 ci = mat->i; cj = mat->j; cam = mat->a; 5046 for (i=0; i<am; i++) { 5047 /* off-diagonal portion of A */ 5048 ncols_o = bi[i+1] - bi[i]; 5049 for (jo=0; jo<ncols_o; jo++) { 5050 col = cmap[*bj]; 5051 if (col >= cstart) break; 5052 *cam++ = *ba++; bj++; 5053 } 5054 /* diagonal portion of A */ 5055 ncols_d = ai[i+1] - ai[i]; 5056 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5057 /* off-diagonal portion of A */ 5058 for (j=jo; j<ncols_o; j++) { 5059 *cam++ = *ba++; bj++; 5060 } 5061 } 5062 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5063 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5064 PetscFunctionReturn(0); 5065 } 5066 5067 /*@C 5068 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5069 5070 Not Collective 5071 5072 Input Parameters: 5073 + A - the matrix 5074 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5075 - row, col - index sets of rows and columns to extract (or NULL) 5076 5077 Output Parameter: 5078 . A_loc - the local sequential matrix generated 5079 5080 Level: developer 5081 5082 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5083 5084 @*/ 5085 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5086 { 5087 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5088 PetscErrorCode ierr; 5089 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5090 IS isrowa,iscola; 5091 Mat *aloc; 5092 PetscBool match; 5093 5094 PetscFunctionBegin; 5095 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5096 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5097 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5098 if (!row) { 5099 start = A->rmap->rstart; end = A->rmap->rend; 5100 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5101 } else { 5102 isrowa = *row; 5103 } 5104 if (!col) { 5105 start = A->cmap->rstart; 5106 cmap = a->garray; 5107 nzA = a->A->cmap->n; 5108 nzB = a->B->cmap->n; 5109 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5110 ncols = 0; 5111 for (i=0; i<nzB; i++) { 5112 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5113 else break; 5114 } 5115 imark = i; 5116 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5117 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5118 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5119 } else { 5120 iscola = *col; 5121 } 5122 if (scall != MAT_INITIAL_MATRIX) { 5123 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5124 aloc[0] = *A_loc; 5125 } 5126 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5127 if (!col) { /* attach global id of condensed columns */ 5128 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5129 } 5130 *A_loc = aloc[0]; 5131 ierr = PetscFree(aloc);CHKERRQ(ierr); 5132 if (!row) { 5133 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5134 } 5135 if (!col) { 5136 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5137 } 5138 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5139 PetscFunctionReturn(0); 5140 } 5141 5142 /* 5143 * Destroy a mat that may be composed with PetscSF communication objects. 5144 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5145 * */ 5146 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5147 { 5148 PetscSF sf,osf; 5149 IS map; 5150 PetscErrorCode ierr; 5151 5152 PetscFunctionBegin; 5153 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5154 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5155 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5156 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5157 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5158 ierr = ISDestroy(&map);CHKERRQ(ierr); 5159 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5160 PetscFunctionReturn(0); 5161 } 5162 5163 /* 5164 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5165 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5166 * on a global size. 5167 * */ 5168 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5169 { 5170 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5171 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5172 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5173 PetscMPIInt owner; 5174 PetscSFNode *iremote,*oiremote; 5175 const PetscInt *lrowindices; 5176 PetscErrorCode ierr; 5177 PetscSF sf,osf; 5178 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5179 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5180 MPI_Comm comm; 5181 ISLocalToGlobalMapping mapping; 5182 5183 PetscFunctionBegin; 5184 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5185 /* plocalsize is the number of roots 5186 * nrows is the number of leaves 5187 * */ 5188 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5189 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5190 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5191 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5192 for (i=0;i<nrows;i++) { 5193 /* Find a remote index and an owner for a row 5194 * The row could be local or remote 5195 * */ 5196 owner = 0; 5197 lidx = 0; 5198 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5199 iremote[i].index = lidx; 5200 iremote[i].rank = owner; 5201 } 5202 /* Create SF to communicate how many nonzero columns for each row */ 5203 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5204 /* SF will figure out the number of nonzero colunms for each row, and their 5205 * offsets 5206 * */ 5207 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5208 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5209 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5210 5211 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5212 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5213 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5214 roffsets[0] = 0; 5215 roffsets[1] = 0; 5216 for (i=0;i<plocalsize;i++) { 5217 /* diag */ 5218 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5219 /* off diag */ 5220 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5221 /* compute offsets so that we relative location for each row */ 5222 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5223 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5224 } 5225 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5226 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5227 /* 'r' means root, and 'l' means leaf */ 5228 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5229 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5230 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5231 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5232 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5233 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5234 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5235 dntotalcols = 0; 5236 ontotalcols = 0; 5237 ncol = 0; 5238 for (i=0;i<nrows;i++) { 5239 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5240 ncol = PetscMax(pnnz[i],ncol); 5241 /* diag */ 5242 dntotalcols += nlcols[i*2+0]; 5243 /* off diag */ 5244 ontotalcols += nlcols[i*2+1]; 5245 } 5246 /* We do not need to figure the right number of columns 5247 * since all the calculations will be done by going through the raw data 5248 * */ 5249 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5250 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5251 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5252 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5253 /* diag */ 5254 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5255 /* off diag */ 5256 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5257 /* diag */ 5258 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5259 /* off diag */ 5260 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5261 dntotalcols = 0; 5262 ontotalcols = 0; 5263 ntotalcols = 0; 5264 for (i=0;i<nrows;i++) { 5265 owner = 0; 5266 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5267 /* Set iremote for diag matrix */ 5268 for (j=0;j<nlcols[i*2+0];j++) { 5269 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5270 iremote[dntotalcols].rank = owner; 5271 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5272 ilocal[dntotalcols++] = ntotalcols++; 5273 } 5274 /* off diag */ 5275 for (j=0;j<nlcols[i*2+1];j++) { 5276 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5277 oiremote[ontotalcols].rank = owner; 5278 oilocal[ontotalcols++] = ntotalcols++; 5279 } 5280 } 5281 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5282 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5283 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5284 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5285 /* P serves as roots and P_oth is leaves 5286 * Diag matrix 5287 * */ 5288 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5289 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5290 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5291 5292 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5293 /* Off diag */ 5294 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5295 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5296 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5297 /* We operate on the matrix internal data for saving memory */ 5298 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5299 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5300 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5301 /* Convert to global indices for diag matrix */ 5302 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5303 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5304 /* We want P_oth store global indices */ 5305 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5306 /* Use memory scalable approach */ 5307 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5308 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5309 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5310 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5311 /* Convert back to local indices */ 5312 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5313 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5314 nout = 0; 5315 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5316 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5317 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5318 /* Exchange values */ 5319 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5320 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5321 /* Stop PETSc from shrinking memory */ 5322 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5323 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5324 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5325 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5326 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5327 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5328 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5329 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5330 PetscFunctionReturn(0); 5331 } 5332 5333 /* 5334 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5335 * This supports MPIAIJ and MAIJ 5336 * */ 5337 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5338 { 5339 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5340 Mat_SeqAIJ *p_oth; 5341 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5342 IS rows,map; 5343 PetscHMapI hamp; 5344 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5345 MPI_Comm comm; 5346 PetscSF sf,osf; 5347 PetscBool has; 5348 PetscErrorCode ierr; 5349 5350 PetscFunctionBegin; 5351 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5352 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5353 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5354 * and then create a submatrix (that often is an overlapping matrix) 5355 * */ 5356 if (reuse==MAT_INITIAL_MATRIX) { 5357 /* Use a hash table to figure out unique keys */ 5358 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5359 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5360 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5361 count = 0; 5362 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5363 for (i=0;i<a->B->cmap->n;i++) { 5364 key = a->garray[i]/dof; 5365 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5366 if (!has) { 5367 mapping[i] = count; 5368 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5369 } else { 5370 /* Current 'i' has the same value the previous step */ 5371 mapping[i] = count-1; 5372 } 5373 } 5374 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5375 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5376 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5377 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5378 off = 0; 5379 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5380 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5381 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5382 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5383 /* In case, the matrix was already created but users want to recreate the matrix */ 5384 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5385 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5386 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5387 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5388 } else if (reuse==MAT_REUSE_MATRIX) { 5389 /* If matrix was already created, we simply update values using SF objects 5390 * that as attached to the matrix ealier. 5391 * */ 5392 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5393 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5394 if (!sf || !osf) { 5395 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5396 } 5397 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5398 /* Update values in place */ 5399 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5400 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5401 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5402 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5403 } else { 5404 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5405 } 5406 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5407 PetscFunctionReturn(0); 5408 } 5409 5410 /*@C 5411 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5412 5413 Collective on Mat 5414 5415 Input Parameters: 5416 + A,B - the matrices in mpiaij format 5417 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5418 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5419 5420 Output Parameter: 5421 + rowb, colb - index sets of rows and columns of B to extract 5422 - B_seq - the sequential matrix generated 5423 5424 Level: developer 5425 5426 @*/ 5427 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5428 { 5429 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5430 PetscErrorCode ierr; 5431 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5432 IS isrowb,iscolb; 5433 Mat *bseq=NULL; 5434 5435 PetscFunctionBegin; 5436 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5437 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5438 } 5439 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5440 5441 if (scall == MAT_INITIAL_MATRIX) { 5442 start = A->cmap->rstart; 5443 cmap = a->garray; 5444 nzA = a->A->cmap->n; 5445 nzB = a->B->cmap->n; 5446 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5447 ncols = 0; 5448 for (i=0; i<nzB; i++) { /* row < local row index */ 5449 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5450 else break; 5451 } 5452 imark = i; 5453 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5454 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5455 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5456 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5457 } else { 5458 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5459 isrowb = *rowb; iscolb = *colb; 5460 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5461 bseq[0] = *B_seq; 5462 } 5463 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5464 *B_seq = bseq[0]; 5465 ierr = PetscFree(bseq);CHKERRQ(ierr); 5466 if (!rowb) { 5467 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5468 } else { 5469 *rowb = isrowb; 5470 } 5471 if (!colb) { 5472 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5473 } else { 5474 *colb = iscolb; 5475 } 5476 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 /* 5481 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5482 of the OFF-DIAGONAL portion of local A 5483 5484 Collective on Mat 5485 5486 Input Parameters: 5487 + A,B - the matrices in mpiaij format 5488 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5489 5490 Output Parameter: 5491 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5492 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5493 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5494 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5495 5496 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5497 for this matrix. This is not desirable.. 5498 5499 Level: developer 5500 5501 */ 5502 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5503 { 5504 PetscErrorCode ierr; 5505 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5506 Mat_SeqAIJ *b_oth; 5507 VecScatter ctx; 5508 MPI_Comm comm; 5509 const PetscMPIInt *rprocs,*sprocs; 5510 const PetscInt *srow,*rstarts,*sstarts; 5511 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5512 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5513 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5514 MPI_Request *rwaits = NULL,*swaits = NULL; 5515 MPI_Status rstatus; 5516 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5517 5518 PetscFunctionBegin; 5519 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5521 5522 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5523 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5524 } 5525 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5526 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5527 5528 if (size == 1) { 5529 startsj_s = NULL; 5530 bufa_ptr = NULL; 5531 *B_oth = NULL; 5532 PetscFunctionReturn(0); 5533 } 5534 5535 ctx = a->Mvctx; 5536 tag = ((PetscObject)ctx)->tag; 5537 5538 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5539 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5540 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5541 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5542 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5543 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5544 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5545 5546 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5547 if (scall == MAT_INITIAL_MATRIX) { 5548 /* i-array */ 5549 /*---------*/ 5550 /* post receives */ 5551 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5552 for (i=0; i<nrecvs; i++) { 5553 rowlen = rvalues + rstarts[i]*rbs; 5554 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5555 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5556 } 5557 5558 /* pack the outgoing message */ 5559 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5560 5561 sstartsj[0] = 0; 5562 rstartsj[0] = 0; 5563 len = 0; /* total length of j or a array to be sent */ 5564 if (nsends) { 5565 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5566 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5567 } 5568 for (i=0; i<nsends; i++) { 5569 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5570 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5571 for (j=0; j<nrows; j++) { 5572 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5573 for (l=0; l<sbs; l++) { 5574 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5575 5576 rowlen[j*sbs+l] = ncols; 5577 5578 len += ncols; 5579 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5580 } 5581 k++; 5582 } 5583 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5584 5585 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5586 } 5587 /* recvs and sends of i-array are completed */ 5588 i = nrecvs; 5589 while (i--) { 5590 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5591 } 5592 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5593 ierr = PetscFree(svalues);CHKERRQ(ierr); 5594 5595 /* allocate buffers for sending j and a arrays */ 5596 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5597 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5598 5599 /* create i-array of B_oth */ 5600 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5601 5602 b_othi[0] = 0; 5603 len = 0; /* total length of j or a array to be received */ 5604 k = 0; 5605 for (i=0; i<nrecvs; i++) { 5606 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5607 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5608 for (j=0; j<nrows; j++) { 5609 b_othi[k+1] = b_othi[k] + rowlen[j]; 5610 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5611 k++; 5612 } 5613 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5614 } 5615 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5616 5617 /* allocate space for j and a arrrays of B_oth */ 5618 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5619 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5620 5621 /* j-array */ 5622 /*---------*/ 5623 /* post receives of j-array */ 5624 for (i=0; i<nrecvs; i++) { 5625 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5626 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5627 } 5628 5629 /* pack the outgoing message j-array */ 5630 if (nsends) k = sstarts[0]; 5631 for (i=0; i<nsends; i++) { 5632 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5633 bufJ = bufj+sstartsj[i]; 5634 for (j=0; j<nrows; j++) { 5635 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5636 for (ll=0; ll<sbs; ll++) { 5637 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5638 for (l=0; l<ncols; l++) { 5639 *bufJ++ = cols[l]; 5640 } 5641 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5642 } 5643 } 5644 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5645 } 5646 5647 /* recvs and sends of j-array are completed */ 5648 i = nrecvs; 5649 while (i--) { 5650 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5651 } 5652 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5653 } else if (scall == MAT_REUSE_MATRIX) { 5654 sstartsj = *startsj_s; 5655 rstartsj = *startsj_r; 5656 bufa = *bufa_ptr; 5657 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5658 b_otha = b_oth->a; 5659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5660 5661 /* a-array */ 5662 /*---------*/ 5663 /* post receives of a-array */ 5664 for (i=0; i<nrecvs; i++) { 5665 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5666 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5667 } 5668 5669 /* pack the outgoing message a-array */ 5670 if (nsends) k = sstarts[0]; 5671 for (i=0; i<nsends; i++) { 5672 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5673 bufA = bufa+sstartsj[i]; 5674 for (j=0; j<nrows; j++) { 5675 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5676 for (ll=0; ll<sbs; ll++) { 5677 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5678 for (l=0; l<ncols; l++) { 5679 *bufA++ = vals[l]; 5680 } 5681 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5682 } 5683 } 5684 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5685 } 5686 /* recvs and sends of a-array are completed */ 5687 i = nrecvs; 5688 while (i--) { 5689 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5690 } 5691 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5692 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5693 5694 if (scall == MAT_INITIAL_MATRIX) { 5695 /* put together the new matrix */ 5696 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5697 5698 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5699 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5700 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5701 b_oth->free_a = PETSC_TRUE; 5702 b_oth->free_ij = PETSC_TRUE; 5703 b_oth->nonew = 0; 5704 5705 ierr = PetscFree(bufj);CHKERRQ(ierr); 5706 if (!startsj_s || !bufa_ptr) { 5707 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5708 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5709 } else { 5710 *startsj_s = sstartsj; 5711 *startsj_r = rstartsj; 5712 *bufa_ptr = bufa; 5713 } 5714 } 5715 5716 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5717 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5718 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5719 PetscFunctionReturn(0); 5720 } 5721 5722 /*@C 5723 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5724 5725 Not Collective 5726 5727 Input Parameters: 5728 . A - The matrix in mpiaij format 5729 5730 Output Parameter: 5731 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5732 . colmap - A map from global column index to local index into lvec 5733 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5734 5735 Level: developer 5736 5737 @*/ 5738 #if defined(PETSC_USE_CTABLE) 5739 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5740 #else 5741 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5742 #endif 5743 { 5744 Mat_MPIAIJ *a; 5745 5746 PetscFunctionBegin; 5747 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5748 PetscValidPointer(lvec, 2); 5749 PetscValidPointer(colmap, 3); 5750 PetscValidPointer(multScatter, 4); 5751 a = (Mat_MPIAIJ*) A->data; 5752 if (lvec) *lvec = a->lvec; 5753 if (colmap) *colmap = a->colmap; 5754 if (multScatter) *multScatter = a->Mvctx; 5755 PetscFunctionReturn(0); 5756 } 5757 5758 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5759 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5760 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5761 #if defined(PETSC_HAVE_MKL_SPARSE) 5762 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5763 #endif 5764 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5765 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5766 #if defined(PETSC_HAVE_ELEMENTAL) 5767 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5768 #endif 5769 #if defined(PETSC_HAVE_HYPRE) 5770 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5771 #endif 5772 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5773 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5774 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5775 5776 /* 5777 Computes (B'*A')' since computing B*A directly is untenable 5778 5779 n p p 5780 ( ) ( ) ( ) 5781 m ( A ) * n ( B ) = m ( C ) 5782 ( ) ( ) ( ) 5783 5784 */ 5785 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5786 { 5787 PetscErrorCode ierr; 5788 Mat At,Bt,Ct; 5789 5790 PetscFunctionBegin; 5791 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5792 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5793 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5794 ierr = MatDestroy(&At);CHKERRQ(ierr); 5795 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5796 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5797 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5798 PetscFunctionReturn(0); 5799 } 5800 5801 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5802 { 5803 PetscErrorCode ierr; 5804 PetscInt m=A->rmap->n,n=B->cmap->n; 5805 5806 PetscFunctionBegin; 5807 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5808 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5809 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5810 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5811 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5812 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5813 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5814 5815 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5816 PetscFunctionReturn(0); 5817 } 5818 5819 /* ----------------------------------------------------------------*/ 5820 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5821 { 5822 Mat_Product *product = C->product; 5823 Mat A = product->A,B=product->B; 5824 5825 PetscFunctionBegin; 5826 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5827 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5828 5829 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5830 C->ops->productsymbolic = MatProductSymbolic_AB; 5831 PetscFunctionReturn(0); 5832 } 5833 5834 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5835 { 5836 PetscErrorCode ierr; 5837 Mat_Product *product = C->product; 5838 5839 PetscFunctionBegin; 5840 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5841 if (product->type == MATPRODUCT_AB) { 5842 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5843 } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]); 5844 PetscFunctionReturn(0); 5845 } 5846 /* ----------------------------------------------------------------*/ 5847 5848 /*MC 5849 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5850 5851 Options Database Keys: 5852 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5853 5854 Level: beginner 5855 5856 Notes: 5857 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5858 in this case the values associated with the rows and columns one passes in are set to zero 5859 in the matrix 5860 5861 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5862 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5863 5864 .seealso: MatCreateAIJ() 5865 M*/ 5866 5867 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5868 { 5869 Mat_MPIAIJ *b; 5870 PetscErrorCode ierr; 5871 PetscMPIInt size; 5872 5873 PetscFunctionBegin; 5874 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5875 5876 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5877 B->data = (void*)b; 5878 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5879 B->assembled = PETSC_FALSE; 5880 B->insertmode = NOT_SET_VALUES; 5881 b->size = size; 5882 5883 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5884 5885 /* build cache for off array entries formed */ 5886 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5887 5888 b->donotstash = PETSC_FALSE; 5889 b->colmap = 0; 5890 b->garray = 0; 5891 b->roworiented = PETSC_TRUE; 5892 5893 /* stuff used for matrix vector multiply */ 5894 b->lvec = NULL; 5895 b->Mvctx = NULL; 5896 5897 /* stuff for MatGetRow() */ 5898 b->rowindices = 0; 5899 b->rowvalues = 0; 5900 b->getrowactive = PETSC_FALSE; 5901 5902 /* flexible pointer used in CUSP/CUSPARSE classes */ 5903 b->spptr = NULL; 5904 5905 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5907 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5909 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5911 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5915 #if defined(PETSC_HAVE_MKL_SPARSE) 5916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5917 #endif 5918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5919 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5921 #if defined(PETSC_HAVE_ELEMENTAL) 5922 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5923 #endif 5924 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5925 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5926 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5927 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5928 #if defined(PETSC_HAVE_HYPRE) 5929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5930 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5931 #endif 5932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5933 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5934 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5935 PetscFunctionReturn(0); 5936 } 5937 5938 /*@C 5939 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5940 and "off-diagonal" part of the matrix in CSR format. 5941 5942 Collective 5943 5944 Input Parameters: 5945 + comm - MPI communicator 5946 . m - number of local rows (Cannot be PETSC_DECIDE) 5947 . n - This value should be the same as the local size used in creating the 5948 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5949 calculated if N is given) For square matrices n is almost always m. 5950 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5951 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5952 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5953 . j - column indices 5954 . a - matrix values 5955 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5956 . oj - column indices 5957 - oa - matrix values 5958 5959 Output Parameter: 5960 . mat - the matrix 5961 5962 Level: advanced 5963 5964 Notes: 5965 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5966 must free the arrays once the matrix has been destroyed and not before. 5967 5968 The i and j indices are 0 based 5969 5970 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5971 5972 This sets local rows and cannot be used to set off-processor values. 5973 5974 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5975 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5976 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5977 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5978 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5979 communication if it is known that only local entries will be set. 5980 5981 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5982 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5983 @*/ 5984 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5985 { 5986 PetscErrorCode ierr; 5987 Mat_MPIAIJ *maij; 5988 5989 PetscFunctionBegin; 5990 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5991 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5992 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5993 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5994 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5995 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5996 maij = (Mat_MPIAIJ*) (*mat)->data; 5997 5998 (*mat)->preallocated = PETSC_TRUE; 5999 6000 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6001 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6002 6003 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6005 6006 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6007 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6008 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6009 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6010 6011 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6012 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6013 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6014 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6015 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6016 PetscFunctionReturn(0); 6017 } 6018 6019 /* 6020 Special version for direct calls from Fortran 6021 */ 6022 #include <petsc/private/fortranimpl.h> 6023 6024 /* Change these macros so can be used in void function */ 6025 #undef CHKERRQ 6026 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6027 #undef SETERRQ2 6028 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6029 #undef SETERRQ3 6030 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6031 #undef SETERRQ 6032 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6033 6034 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6035 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6036 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6037 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6038 #else 6039 #endif 6040 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6041 { 6042 Mat mat = *mmat; 6043 PetscInt m = *mm, n = *mn; 6044 InsertMode addv = *maddv; 6045 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6046 PetscScalar value; 6047 PetscErrorCode ierr; 6048 6049 MatCheckPreallocated(mat,1); 6050 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6051 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6052 { 6053 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6054 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6055 PetscBool roworiented = aij->roworiented; 6056 6057 /* Some Variables required in the macro */ 6058 Mat A = aij->A; 6059 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6060 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6061 MatScalar *aa = a->a; 6062 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6063 Mat B = aij->B; 6064 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6065 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6066 MatScalar *ba = b->a; 6067 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6068 * cannot use "#if defined" inside a macro. */ 6069 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6070 6071 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6072 PetscInt nonew = a->nonew; 6073 MatScalar *ap1,*ap2; 6074 6075 PetscFunctionBegin; 6076 for (i=0; i<m; i++) { 6077 if (im[i] < 0) continue; 6078 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6079 if (im[i] >= rstart && im[i] < rend) { 6080 row = im[i] - rstart; 6081 lastcol1 = -1; 6082 rp1 = aj + ai[row]; 6083 ap1 = aa + ai[row]; 6084 rmax1 = aimax[row]; 6085 nrow1 = ailen[row]; 6086 low1 = 0; 6087 high1 = nrow1; 6088 lastcol2 = -1; 6089 rp2 = bj + bi[row]; 6090 ap2 = ba + bi[row]; 6091 rmax2 = bimax[row]; 6092 nrow2 = bilen[row]; 6093 low2 = 0; 6094 high2 = nrow2; 6095 6096 for (j=0; j<n; j++) { 6097 if (roworiented) value = v[i*n+j]; 6098 else value = v[i+j*m]; 6099 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6100 if (in[j] >= cstart && in[j] < cend) { 6101 col = in[j] - cstart; 6102 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6103 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6104 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6105 #endif 6106 } else if (in[j] < 0) continue; 6107 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6108 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6109 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6110 } else { 6111 if (mat->was_assembled) { 6112 if (!aij->colmap) { 6113 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6114 } 6115 #if defined(PETSC_USE_CTABLE) 6116 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6117 col--; 6118 #else 6119 col = aij->colmap[in[j]] - 1; 6120 #endif 6121 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6122 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6123 col = in[j]; 6124 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6125 B = aij->B; 6126 b = (Mat_SeqAIJ*)B->data; 6127 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6128 rp2 = bj + bi[row]; 6129 ap2 = ba + bi[row]; 6130 rmax2 = bimax[row]; 6131 nrow2 = bilen[row]; 6132 low2 = 0; 6133 high2 = nrow2; 6134 bm = aij->B->rmap->n; 6135 ba = b->a; 6136 inserted = PETSC_FALSE; 6137 } 6138 } else col = in[j]; 6139 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6140 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6141 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6142 #endif 6143 } 6144 } 6145 } else if (!aij->donotstash) { 6146 if (roworiented) { 6147 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6148 } else { 6149 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6150 } 6151 } 6152 } 6153 } 6154 PetscFunctionReturnVoid(); 6155 } 6156