1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 218 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) { \ 444 ap1[_i] += value; \ 445 /* Not sure LogFlops will slow dow the code or not */ \ 446 (void)PetscLogFlops(1.0); \ 447 } \ 448 else ap1[_i] = value; \ 449 goto a_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 453 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 454 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 455 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 456 N = nrow1++ - 1; a->nz++; high1++; \ 457 /* shift up all the later entries in this row */ \ 458 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 459 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 460 rp1[_i] = col; \ 461 ap1[_i] = value; \ 462 A->nonzerostate++;\ 463 a_noinsert: ; \ 464 ailen[row] = nrow1; \ 465 } 466 467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 468 { \ 469 if (col <= lastcol2) low2 = 0; \ 470 else high2 = nrow2; \ 471 lastcol2 = col; \ 472 while (high2-low2 > 5) { \ 473 t = (low2+high2)/2; \ 474 if (rp2[t] > col) high2 = t; \ 475 else low2 = t; \ 476 } \ 477 for (_i=low2; _i<high2; _i++) { \ 478 if (rp2[_i] > col) break; \ 479 if (rp2[_i] == col) { \ 480 if (addv == ADD_VALUES) { \ 481 ap2[_i] += value; \ 482 (void)PetscLogFlops(1.0); \ 483 } \ 484 else ap2[_i] = value; \ 485 goto b_noinsert; \ 486 } \ 487 } \ 488 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 489 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 490 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 491 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 492 N = nrow2++ - 1; b->nz++; high2++; \ 493 /* shift up all the later entries in this row */ \ 494 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 495 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 496 rp2[_i] = col; \ 497 ap2[_i] = value; \ 498 B->nonzerostate++; \ 499 b_noinsert: ; \ 500 bilen[row] = nrow2; \ 501 } 502 503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 504 { 505 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 507 PetscErrorCode ierr; 508 PetscInt l,*garray = mat->garray,diag; 509 510 PetscFunctionBegin; 511 /* code only works for square matrices A */ 512 513 /* find size of row to the left of the diagonal part */ 514 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 515 row = row - diag; 516 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 517 if (garray[b->j[b->i[row]+l]] > diag) break; 518 } 519 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 520 521 /* diagonal part */ 522 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 523 524 /* right of diagonal part */ 525 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 PetscScalar value; 533 PetscErrorCode ierr; 534 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 536 PetscBool roworiented = aij->roworiented; 537 538 /* Some Variables required in the macro */ 539 Mat A = aij->A; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 541 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 542 MatScalar *aa = a->a; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 546 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 547 MatScalar *ba = b->a; 548 549 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 550 PetscInt nonew; 551 MatScalar *ap1,*ap2; 552 553 PetscFunctionBegin; 554 for (i=0; i<m; i++) { 555 if (im[i] < 0) continue; 556 #if defined(PETSC_USE_DEBUG) 557 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 558 #endif 559 if (im[i] >= rstart && im[i] < rend) { 560 row = im[i] - rstart; 561 lastcol1 = -1; 562 rp1 = aj + ai[row]; 563 ap1 = aa + ai[row]; 564 rmax1 = aimax[row]; 565 nrow1 = ailen[row]; 566 low1 = 0; 567 high1 = nrow1; 568 lastcol2 = -1; 569 rp2 = bj + bi[row]; 570 ap2 = ba + bi[row]; 571 rmax2 = bimax[row]; 572 nrow2 = bilen[row]; 573 low2 = 0; 574 high2 = nrow2; 575 576 for (j=0; j<n; j++) { 577 if (roworiented) value = v[i*n+j]; 578 else value = v[i+j*m]; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 583 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 584 } else if (in[j] < 0) continue; 585 #if defined(PETSC_USE_DEBUG) 586 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 587 #endif 588 else { 589 if (mat->was_assembled) { 590 if (!aij->colmap) { 591 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 595 col--; 596 #else 597 col = aij->colmap[in[j]] - 1; 598 #endif 599 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 600 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 601 col = in[j]; 602 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 603 B = aij->B; 604 b = (Mat_SeqAIJ*)B->data; 605 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 606 rp2 = bj + bi[row]; 607 ap2 = ba + bi[row]; 608 rmax2 = bimax[row]; 609 nrow2 = bilen[row]; 610 low2 = 0; 611 high2 = nrow2; 612 bm = aij->B->rmap->n; 613 ba = b->a; 614 } else if (col < 0) { 615 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 616 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 617 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 618 } 619 } else col = in[j]; 620 nonew = b->nonew; 621 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 622 } 623 } 624 } else { 625 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 626 if (!aij->donotstash) { 627 mat->assembled = PETSC_FALSE; 628 if (roworiented) { 629 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 630 } else { 631 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 632 } 633 } 634 } 635 } 636 PetscFunctionReturn(0); 637 } 638 639 /* 640 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 641 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 642 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 643 */ 644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 645 { 646 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 647 Mat A = aij->A; /* diagonal part of the matrix */ 648 Mat B = aij->B; /* offdiagonal part of the matrix */ 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 650 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 651 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 652 PetscInt *ailen = a->ilen,*aj = a->j; 653 PetscInt *bilen = b->ilen,*bj = b->j; 654 PetscInt am = aij->A->rmap->n,j; 655 PetscInt diag_so_far = 0,dnz; 656 PetscInt offd_so_far = 0,onz; 657 658 PetscFunctionBegin; 659 /* Iterate over all rows of the matrix */ 660 for (j=0; j<am; j++) { 661 dnz = onz = 0; 662 /* Iterate over all non-zero columns of the current row */ 663 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 664 /* If column is in the diagonal */ 665 if (mat_j[col] >= cstart && mat_j[col] < cend) { 666 aj[diag_so_far++] = mat_j[col] - cstart; 667 dnz++; 668 } else { /* off-diagonal entries */ 669 bj[offd_so_far++] = mat_j[col]; 670 onz++; 671 } 672 } 673 ailen[j] = dnz; 674 bilen[j] = onz; 675 } 676 PetscFunctionReturn(0); 677 } 678 679 /* 680 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 681 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 682 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 683 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 684 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 685 */ 686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat A = aij->A; /* diagonal part of the matrix */ 690 Mat B = aij->B; /* offdiagonal part of the matrix */ 691 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 693 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 694 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 695 PetscInt *ailen = a->ilen,*aj = a->j; 696 PetscInt *bilen = b->ilen,*bj = b->j; 697 PetscInt am = aij->A->rmap->n,j; 698 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 699 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 700 PetscScalar *aa = a->a,*ba = b->a; 701 702 PetscFunctionBegin; 703 /* Iterate over all rows of the matrix */ 704 for (j=0; j<am; j++) { 705 dnz_row = onz_row = 0; 706 rowstart_offd = full_offd_i[j]; 707 rowstart_diag = full_diag_i[j]; 708 /* Iterate over all non-zero columns of the current row */ 709 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 710 /* If column is in the diagonal */ 711 if (mat_j[col] >= cstart && mat_j[col] < cend) { 712 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 713 aa[rowstart_diag+dnz_row] = mat_a[col]; 714 dnz_row++; 715 } else { /* off-diagonal entries */ 716 bj[rowstart_offd+onz_row] = mat_j[col]; 717 ba[rowstart_offd+onz_row] = mat_a[col]; 718 onz_row++; 719 } 720 } 721 ailen[j] = dnz_row; 722 bilen[j] = onz_row; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 PetscErrorCode ierr; 731 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 733 734 PetscFunctionBegin; 735 for (i=0; i<m; i++) { 736 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 737 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 738 if (idxm[i] >= rstart && idxm[i] < rend) { 739 row = idxm[i] - rstart; 740 for (j=0; j<n; j++) { 741 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 742 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 746 } else { 747 if (!aij->colmap) { 748 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 749 } 750 #if defined(PETSC_USE_CTABLE) 751 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 752 col--; 753 #else 754 col = aij->colmap[idxn[j]] - 1; 755 #endif 756 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 757 else { 758 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 759 } 760 } 761 } 762 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 768 769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 772 PetscErrorCode ierr; 773 PetscInt nstash,reallocs; 774 775 PetscFunctionBegin; 776 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 777 778 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 779 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 780 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 785 { 786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 787 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 788 PetscErrorCode ierr; 789 PetscMPIInt n; 790 PetscInt i,j,rstart,ncols,flg; 791 PetscInt *row,*col; 792 PetscBool other_disassembled; 793 PetscScalar *val; 794 795 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 796 797 PetscFunctionBegin; 798 if (!aij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 801 if (!flg) break; 802 803 for (i=0; i<n; ) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j=i,rstart=row[j]; j<n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j-i; 809 else ncols = n-i; 810 /* Now assemble all these values with a single function call */ 811 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 812 813 i = j; 814 } 815 } 816 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 817 } 818 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 820 821 /* determine if any processor has disassembled, if so we must 822 also disassemble ourselfs, in order that we may reassemble. */ 823 /* 824 if nonzero structure of submatrix B cannot change then we know that 825 no processor disassembled thus we can skip this stuff 826 */ 827 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 828 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 829 if (mat->was_assembled && !other_disassembled) { 830 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 } 833 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 834 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 835 } 836 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 839 840 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 841 842 aij->rowvalues = 0; 843 844 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 845 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 846 847 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 848 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 849 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 850 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 862 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 867 { 868 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 869 PetscObjectState sA, sB; 870 PetscInt *lrows; 871 PetscInt r, len; 872 PetscBool cong, lch, gch; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 /* get locally owned rows */ 877 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 878 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 879 /* fix right hand side if needed */ 880 if (x && b) { 881 const PetscScalar *xx; 882 PetscScalar *bb; 883 884 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 885 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 886 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 887 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 888 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 889 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 890 } 891 892 sA = mat->A->nonzerostate; 893 sB = mat->B->nonzerostate; 894 895 if (diag != 0.0 && cong) { 896 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 897 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 898 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 899 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 900 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 901 PetscInt nnwA, nnwB; 902 PetscBool nnzA, nnzB; 903 904 nnwA = aijA->nonew; 905 nnwB = aijB->nonew; 906 nnzA = aijA->keepnonzeropattern; 907 nnzB = aijB->keepnonzeropattern; 908 if (!nnzA) { 909 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 910 aijA->nonew = 0; 911 } 912 if (!nnzB) { 913 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 914 aijB->nonew = 0; 915 } 916 /* Must zero here before the next loop */ 917 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 918 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 919 for (r = 0; r < len; ++r) { 920 const PetscInt row = lrows[r] + A->rmap->rstart; 921 if (row >= A->cmap->N) continue; 922 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 923 } 924 aijA->nonew = nnwA; 925 aijB->nonew = nnwB; 926 } else { 927 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 928 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 929 } 930 ierr = PetscFree(lrows);CHKERRQ(ierr); 931 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 934 /* reduce nonzerostate */ 935 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 936 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 937 if (gch) A->nonzerostate++; 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 942 { 943 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 944 PetscErrorCode ierr; 945 PetscMPIInt n = A->rmap->n; 946 PetscInt i,j,r,m,p = 0,len = 0; 947 PetscInt *lrows,*owners = A->rmap->range; 948 PetscSFNode *rrows; 949 PetscSF sf; 950 const PetscScalar *xx; 951 PetscScalar *bb,*mask; 952 Vec xmask,lmask; 953 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 954 const PetscInt *aj, *ii,*ridx; 955 PetscScalar *aa; 956 957 PetscFunctionBegin; 958 /* Create SF where leaves are input rows and roots are owned rows */ 959 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 960 for (r = 0; r < n; ++r) lrows[r] = -1; 961 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 962 for (r = 0; r < N; ++r) { 963 const PetscInt idx = rows[r]; 964 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 965 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 966 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 967 } 968 rrows[r].rank = p; 969 rrows[r].index = rows[r] - owners[p]; 970 } 971 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 972 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 973 /* Collect flags for rows to be zeroed */ 974 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 975 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 976 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 977 /* Compress and put in row numbers */ 978 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 979 /* zero diagonal part of matrix */ 980 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 981 /* handle off diagonal part of matrix */ 982 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 983 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 984 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 985 for (i=0; i<len; i++) bb[lrows[i]] = 1; 986 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 987 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 990 if (x && b) { /* this code is buggy when the row and column layout don't match */ 991 PetscBool cong; 992 993 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 994 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 995 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 998 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 999 } 1000 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1001 /* remove zeroed rows of off diagonal matrix */ 1002 ii = aij->i; 1003 for (i=0; i<len; i++) { 1004 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1005 } 1006 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1007 if (aij->compressedrow.use) { 1008 m = aij->compressedrow.nrows; 1009 ii = aij->compressedrow.i; 1010 ridx = aij->compressedrow.rindex; 1011 for (i=0; i<m; i++) { 1012 n = ii[i+1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij->a + ii[i]; 1015 1016 for (j=0; j<n; j++) { 1017 if (PetscAbsScalar(mask[*aj])) { 1018 if (b) bb[*ridx] -= *aa*xx[*aj]; 1019 *aa = 0.0; 1020 } 1021 aa++; 1022 aj++; 1023 } 1024 ridx++; 1025 } 1026 } else { /* do not use compressed row format */ 1027 m = l->B->rmap->n; 1028 for (i=0; i<m; i++) { 1029 n = ii[i+1] - ii[i]; 1030 aj = aij->j + ii[i]; 1031 aa = aij->a + ii[i]; 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[i] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 } 1041 } 1042 if (x && b) { 1043 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1044 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1045 } 1046 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1047 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1048 ierr = PetscFree(lrows);CHKERRQ(ierr); 1049 1050 /* only change matrix nonzero state if pattern was allowed to be changed */ 1051 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1052 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1053 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt nt; 1063 VecScatter Mvctx = a->Mvctx; 1064 1065 PetscFunctionBegin; 1066 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1067 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1068 1069 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 PetscErrorCode ierr; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 VecScatter Mvctx = a->Mvctx; 1091 1092 PetscFunctionBegin; 1093 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1094 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1097 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 /* do nondiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1109 /* do local part */ 1110 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1111 /* add partial results together */ 1112 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1118 { 1119 MPI_Comm comm; 1120 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1121 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1122 IS Me,Notme; 1123 PetscErrorCode ierr; 1124 PetscInt M,N,first,last,*notme,i; 1125 PetscBool lf; 1126 PetscMPIInt size; 1127 1128 PetscFunctionBegin; 1129 /* Easy test: symmetric diagonal block */ 1130 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1131 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1132 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1133 if (!*f) PetscFunctionReturn(0); 1134 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1135 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1136 if (size == 1) PetscFunctionReturn(0); 1137 1138 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1139 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1140 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1142 for (i=0; i<first; i++) notme[i] = i; 1143 for (i=last; i<M; i++) notme[i-last+first] = i; 1144 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1145 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1146 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1147 Aoff = Aoffs[0]; 1148 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1149 Boff = Boffs[0]; 1150 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1151 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1152 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1153 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1154 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1155 ierr = PetscFree(notme);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1160 { 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1169 { 1170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 /* do nondiagonal part */ 1175 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1176 /* do local part */ 1177 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1178 /* add partial results together */ 1179 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 /* 1185 This only works correctly for square matrices where the subblock A->A is the 1186 diagonal block 1187 */ 1188 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1189 { 1190 PetscErrorCode ierr; 1191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1192 1193 PetscFunctionBegin; 1194 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1195 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1196 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1207 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1212 { 1213 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 #if defined(PETSC_USE_LOG) 1218 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1219 #endif 1220 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1221 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1222 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1223 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1224 #if defined(PETSC_USE_CTABLE) 1225 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1226 #else 1227 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1230 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1231 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1232 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1233 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1234 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1235 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1236 1237 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1246 #if defined(PETSC_HAVE_ELEMENTAL) 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1248 #endif 1249 #if defined(PETSC_HAVE_HYPRE) 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1252 #endif 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1262 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1263 PetscErrorCode ierr; 1264 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1265 int fd; 1266 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1267 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1268 PetscScalar *column_values; 1269 PetscInt message_count,flowcontrolcount; 1270 FILE *file; 1271 1272 PetscFunctionBegin; 1273 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1275 nz = A->nz + B->nz; 1276 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1277 if (!rank) { 1278 header[0] = MAT_FILE_CLASSID; 1279 header[1] = mat->rmap->N; 1280 header[2] = mat->cmap->N; 1281 1282 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 /* get largest number of rows any processor has */ 1285 rlen = mat->rmap->n; 1286 range = mat->rmap->range; 1287 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1288 } else { 1289 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1290 rlen = mat->rmap->n; 1291 } 1292 1293 /* load up the local row counts */ 1294 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1295 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1296 1297 /* store the row lengths to the file */ 1298 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1299 if (!rank) { 1300 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1301 for (i=1; i<size; i++) { 1302 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1303 rlen = range[i+1] - range[i]; 1304 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1306 } 1307 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1308 } else { 1309 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1310 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1311 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1312 } 1313 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1314 1315 /* load up the local column indices */ 1316 nzmax = nz; /* th processor needs space a largest processor needs */ 1317 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1319 cnt = 0; 1320 for (i=0; i<mat->rmap->n; i++) { 1321 for (j=B->i[i]; j<B->i[i+1]; j++) { 1322 if ((col = garray[B->j[j]]) > cstart) break; 1323 column_indices[cnt++] = col; 1324 } 1325 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1326 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1327 } 1328 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1329 1330 /* store the column indices to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 MPI_Status status; 1334 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1337 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1338 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1339 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1345 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1346 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1350 1351 /* load up the local column values */ 1352 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if (garray[B->j[j]] > cstart) break; 1357 column_values[cnt++] = B->a[j]; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1360 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1361 } 1362 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1363 1364 /* store the column values to the file */ 1365 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1366 if (!rank) { 1367 MPI_Status status; 1368 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1369 for (i=1; i<size; i++) { 1370 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1371 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1372 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1373 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1374 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1375 } 1376 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1377 } else { 1378 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1379 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1380 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1381 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(column_values);CHKERRQ(ierr); 1384 1385 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1386 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #include <petscdraw.h> 1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1392 { 1393 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1394 PetscErrorCode ierr; 1395 PetscMPIInt rank = aij->rank,size = aij->size; 1396 PetscBool isdraw,iascii,isbinary; 1397 PetscViewer sviewer; 1398 PetscViewerFormat format; 1399 1400 PetscFunctionBegin; 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1402 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1404 if (iascii) { 1405 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1406 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1407 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1408 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1409 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 for (i=0; i<(PetscInt)size; i++) { 1411 nmax = PetscMax(nmax,nz[i]); 1412 nmin = PetscMin(nmin,nz[i]); 1413 navg += nz[i]; 1414 } 1415 ierr = PetscFree(nz);CHKERRQ(ierr); 1416 navg = navg/size; 1417 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1421 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1422 MatInfo info; 1423 PetscBool inodes; 1424 1425 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1426 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1427 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1429 if (!inodes) { 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1431 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1432 } else { 1433 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1434 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1435 } 1436 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1437 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1438 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1443 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1446 PetscInt inodecount,inodelimit,*inodes; 1447 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1448 if (inodes) { 1449 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1450 } else { 1451 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1455 PetscFunctionReturn(0); 1456 } 1457 } else if (isbinary) { 1458 if (size == 1) { 1459 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1460 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } else if (iascii && size == 1) { 1466 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1467 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (isdraw) { 1470 PetscDraw draw; 1471 PetscBool isnull; 1472 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1473 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1474 if (isnull) PetscFunctionReturn(0); 1475 } 1476 1477 { /* assemble the entire matrix onto first processor */ 1478 Mat A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1484 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1485 /* The commented code uses MatCreateSubMatrices instead */ 1486 /* 1487 Mat *AA, A = NULL, Av; 1488 IS isrow,iscol; 1489 1490 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1491 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1492 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1493 if (!rank) { 1494 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1495 A = AA[0]; 1496 Av = AA[0]; 1497 } 1498 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1499 */ 1500 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1501 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1502 /* 1503 Everyone has to call to draw the matrix since the graphics waits are 1504 synchronized across all processors that share the PetscDraw object 1505 */ 1506 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1507 if (!rank) { 1508 if (((PetscObject)mat)->name) { 1509 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1510 } 1511 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1512 } 1513 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1514 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1515 ierr = MatDestroy(&A);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1521 { 1522 PetscErrorCode ierr; 1523 PetscBool iascii,isdraw,issocket,isbinary; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1527 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1528 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1530 if (iascii || isdraw || isbinary || issocket) { 1531 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1537 { 1538 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1539 PetscErrorCode ierr; 1540 Vec bb1 = 0; 1541 PetscBool hasop; 1542 1543 PetscFunctionBegin; 1544 if (flag == SOR_APPLY_UPPER) { 1545 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1550 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1551 } 1552 1553 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1554 if (flag & SOR_ZERO_INITIAL_GUESS) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 its--; 1557 } 1558 1559 while (its--) { 1560 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 1563 /* update rhs: bb1 = bb - B*x */ 1564 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1565 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1566 1567 /* local sweep */ 1568 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1569 } 1570 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1571 if (flag & SOR_ZERO_INITIAL_GUESS) { 1572 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1573 its--; 1574 } 1575 while (its--) { 1576 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 1579 /* update rhs: bb1 = bb - B*x */ 1580 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1581 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1582 1583 /* local sweep */ 1584 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1585 } 1586 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1587 if (flag & SOR_ZERO_INITIAL_GUESS) { 1588 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1589 its--; 1590 } 1591 while (its--) { 1592 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 1595 /* update rhs: bb1 = bb - B*x */ 1596 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1597 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1598 1599 /* local sweep */ 1600 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1601 } 1602 } else if (flag & SOR_EISENSTAT) { 1603 Vec xx1; 1604 1605 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1607 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 if (!mat->diag) { 1611 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1612 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1613 } 1614 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1615 if (hasop) { 1616 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1617 } else { 1618 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1619 } 1620 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1621 1622 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1626 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1627 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1628 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1629 1630 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1631 1632 matin->factorerrortype = mat->A->factorerrortype; 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1637 { 1638 Mat aA,aB,Aperm; 1639 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1640 PetscScalar *aa,*ba; 1641 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1642 PetscSF rowsf,sf; 1643 IS parcolp = NULL; 1644 PetscBool done; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1649 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1650 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1651 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1652 1653 /* Invert row permutation to find out where my rows should go */ 1654 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1655 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1656 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1658 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1660 1661 /* Invert column permutation to find out where my columns should go */ 1662 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1663 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1664 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1665 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1666 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1667 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1668 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1669 1670 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1671 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1672 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1673 1674 /* Find out where my gcols should go */ 1675 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1676 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1678 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1679 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1680 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1681 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1682 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1683 1684 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1685 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1686 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1687 for (i=0; i<m; i++) { 1688 PetscInt row = rdest[i],rowner; 1689 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1690 for (j=ai[i]; j<ai[i+1]; j++) { 1691 PetscInt cowner,col = cdest[aj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 for (j=bi[i]; j<bi[i+1]; j++) { 1697 PetscInt cowner,col = gcdest[bj[j]]; 1698 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1699 if (rowner == cowner) dnnz[i]++; 1700 else onnz[i]++; 1701 } 1702 } 1703 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1704 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1705 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1706 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1707 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1708 1709 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1710 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1711 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1712 for (i=0; i<m; i++) { 1713 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1714 PetscInt j0,rowlen; 1715 rowlen = ai[i+1] - ai[i]; 1716 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1717 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1718 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1719 } 1720 rowlen = bi[i+1] - bi[i]; 1721 for (j0=j=0; j<rowlen; j0=j) { 1722 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1723 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1724 } 1725 } 1726 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1729 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1730 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1731 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1732 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1733 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1734 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1735 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1736 *B = Aperm; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1741 { 1742 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1747 if (ghosts) *ghosts = aij->garray; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 Mat A = mat->A,B = mat->B; 1755 PetscErrorCode ierr; 1756 PetscReal isend[5],irecv[5]; 1757 1758 PetscFunctionBegin; 1759 info->block_size = 1.0; 1760 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1761 1762 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1763 isend[3] = info->memory; isend[4] = info->mallocs; 1764 1765 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1766 1767 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1768 isend[3] += info->memory; isend[4] += info->mallocs; 1769 if (flag == MAT_LOCAL) { 1770 info->nz_used = isend[0]; 1771 info->nz_allocated = isend[1]; 1772 info->nz_unneeded = isend[2]; 1773 info->memory = isend[3]; 1774 info->mallocs = isend[4]; 1775 } else if (flag == MAT_GLOBAL_MAX) { 1776 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1777 1778 info->nz_used = irecv[0]; 1779 info->nz_allocated = irecv[1]; 1780 info->nz_unneeded = irecv[2]; 1781 info->memory = irecv[3]; 1782 info->mallocs = irecv[4]; 1783 } else if (flag == MAT_GLOBAL_SUM) { 1784 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1785 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } 1792 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1793 info->fill_ratio_needed = 0; 1794 info->factor_mallocs = 0; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1799 { 1800 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 switch (op) { 1805 case MAT_NEW_NONZERO_LOCATIONS: 1806 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1807 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1808 case MAT_KEEP_NONZERO_PATTERN: 1809 case MAT_NEW_NONZERO_LOCATION_ERR: 1810 case MAT_USE_INODES: 1811 case MAT_IGNORE_ZERO_ENTRIES: 1812 MatCheckPreallocated(A,1); 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_ROW_ORIENTED: 1817 MatCheckPreallocated(A,1); 1818 a->roworiented = flg; 1819 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_NEW_DIAGONALS: 1824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1825 break; 1826 case MAT_IGNORE_OFF_PROC_ENTRIES: 1827 a->donotstash = flg; 1828 break; 1829 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1830 case MAT_SPD: 1831 case MAT_SYMMETRIC: 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 case MAT_HERMITIAN: 1834 case MAT_SYMMETRY_ETERNAL: 1835 break; 1836 case MAT_SUBMAT_SINGLEIS: 1837 A->submat_singleis = flg; 1838 break; 1839 case MAT_STRUCTURE_ONLY: 1840 /* The option is handled directly by MatSetOption() */ 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1849 { 1850 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1851 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1852 PetscErrorCode ierr; 1853 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1854 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1855 PetscInt *cmap,*idx_p; 1856 1857 PetscFunctionBegin; 1858 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1859 mat->getrowactive = PETSC_TRUE; 1860 1861 if (!mat->rowvalues && (idx || v)) { 1862 /* 1863 allocate enough space to hold information from the longest row. 1864 */ 1865 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1866 PetscInt max = 1,tmp; 1867 for (i=0; i<matin->rmap->n; i++) { 1868 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1869 if (max < tmp) max = tmp; 1870 } 1871 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1872 } 1873 1874 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1875 lrow = row - rstart; 1876 1877 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1878 if (!v) {pvA = 0; pvB = 0;} 1879 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1880 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 nztot = nzA + nzB; 1883 1884 cmap = mat->garray; 1885 if (v || idx) { 1886 if (nztot) { 1887 /* Sort by increasing column numbers, assuming A and B already sorted */ 1888 PetscInt imark = -1; 1889 if (v) { 1890 *v = v_p = mat->rowvalues; 1891 for (i=0; i<nzB; i++) { 1892 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1893 else break; 1894 } 1895 imark = i; 1896 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1897 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1898 } 1899 if (idx) { 1900 *idx = idx_p = mat->rowindices; 1901 if (imark > -1) { 1902 for (i=0; i<imark; i++) { 1903 idx_p[i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1908 else break; 1909 } 1910 imark = i; 1911 } 1912 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1913 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1914 } 1915 } else { 1916 if (idx) *idx = 0; 1917 if (v) *v = 0; 1918 } 1919 } 1920 *nz = nztot; 1921 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1922 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1927 { 1928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1929 1930 PetscFunctionBegin; 1931 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1932 aij->getrowactive = PETSC_FALSE; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,cstart = mat->cmap->rstart; 1942 PetscReal sum = 0.0; 1943 MatScalar *v; 1944 1945 PetscFunctionBegin; 1946 if (aij->size == 1) { 1947 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1948 } else { 1949 if (type == NORM_FROBENIUS) { 1950 v = amat->a; 1951 for (i=0; i<amat->nz; i++) { 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 } 1954 v = bmat->a; 1955 for (i=0; i<bmat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1997 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1998 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2004 { 2005 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2006 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2007 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2008 PetscErrorCode ierr; 2009 Mat B,A_diag,*B_diag; 2010 MatScalar *array; 2011 2012 PetscFunctionBegin; 2013 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2014 ai = Aloc->i; aj = Aloc->j; 2015 bi = Bloc->i; bj = Bloc->j; 2016 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2017 PetscInt *d_nnz,*g_nnz,*o_nnz; 2018 PetscSFNode *oloc; 2019 PETSC_UNUSED PetscSF sf; 2020 2021 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2022 /* compute d_nnz for preallocation */ 2023 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2024 for (i=0; i<ai[ma]; i++) { 2025 d_nnz[aj[i]]++; 2026 } 2027 /* compute local off-diagonal contributions */ 2028 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2029 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2030 /* map those to global */ 2031 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2032 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2033 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2034 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2035 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2036 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2037 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2038 2039 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2040 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2041 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2042 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2043 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2044 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2045 } else { 2046 B = *matout; 2047 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2048 } 2049 2050 b = (Mat_MPIAIJ*)B->data; 2051 A_diag = a->A; 2052 B_diag = &b->A; 2053 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2054 A_diag_ncol = A_diag->cmap->N; 2055 B_diag_ilen = sub_B_diag->ilen; 2056 B_diag_i = sub_B_diag->i; 2057 2058 /* Set ilen for diagonal of B */ 2059 for (i=0; i<A_diag_ncol; i++) { 2060 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2061 } 2062 2063 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2064 very quickly (=without using MatSetValues), because all writes are local. */ 2065 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2066 2067 /* copy over the B part */ 2068 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2069 array = Bloc->a; 2070 row = A->rmap->rstart; 2071 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2072 cols_tmp = cols; 2073 for (i=0; i<mb; i++) { 2074 ncol = bi[i+1]-bi[i]; 2075 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; 2077 array += ncol; cols_tmp += ncol; 2078 } 2079 ierr = PetscFree(cols);CHKERRQ(ierr); 2080 2081 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2084 *matout = B; 2085 } else { 2086 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2092 { 2093 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2094 Mat a = aij->A,b = aij->B; 2095 PetscErrorCode ierr; 2096 PetscInt s1,s2,s3; 2097 2098 PetscFunctionBegin; 2099 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2100 if (rr) { 2101 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2102 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2103 /* Overlap communication with computation. */ 2104 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 } 2106 if (ll) { 2107 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2108 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2109 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2110 } 2111 /* scale the diagonal block */ 2112 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2113 2114 if (rr) { 2115 /* Do a scatter end and then right scale the off-diagonal block */ 2116 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2123 { 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2152 { 2153 PetscErrorCode ierr; 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2155 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2156 2157 PetscFunctionBegin; 2158 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2159 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2160 /* because of the column compression in the off-processor part of the matrix a->B, 2161 the number of columns in a->B and b->B may be different, hence we cannot call 2162 the MatCopy() directly on the two parts. If need be, we can provide a more 2163 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2164 then copying the submatrices */ 2165 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2166 } else { 2167 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2168 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2169 } 2170 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2175 { 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 /* 2184 Computes the number of nonzeros per row needed for preallocation when X and Y 2185 have different nonzero structure. 2186 */ 2187 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2188 { 2189 PetscInt i,j,k,nzx,nzy; 2190 2191 PetscFunctionBegin; 2192 /* Set the number of nonzeros in the new matrix */ 2193 for (i=0; i<m; i++) { 2194 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2195 nzx = xi[i+1] - xi[i]; 2196 nzy = yi[i+1] - yi[i]; 2197 nnz[i] = 0; 2198 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2199 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2200 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2201 nnz[i]++; 2202 } 2203 for (; k<nzy; k++) nnz[i]++; 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2210 { 2211 PetscErrorCode ierr; 2212 PetscInt m = Y->rmap->N; 2213 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2214 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2215 2216 PetscFunctionBegin; 2217 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2222 { 2223 PetscErrorCode ierr; 2224 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2225 PetscBLASInt bnz,one=1; 2226 Mat_SeqAIJ *x,*y; 2227 2228 PetscFunctionBegin; 2229 if (str == SAME_NONZERO_PATTERN) { 2230 PetscScalar alpha = a; 2231 x = (Mat_SeqAIJ*)xx->A->data; 2232 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2233 y = (Mat_SeqAIJ*)yy->A->data; 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 x = (Mat_SeqAIJ*)xx->B->data; 2236 y = (Mat_SeqAIJ*)yy->B->data; 2237 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2238 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2239 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2240 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2241 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2266 { 2267 #if defined(PETSC_USE_COMPLEX) 2268 PetscErrorCode ierr; 2269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2270 2271 PetscFunctionBegin; 2272 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2273 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2274 #else 2275 PetscFunctionBegin; 2276 #endif 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2298 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 PetscInt i,*idxb = 0; 2307 PetscScalar *va,*vb; 2308 Vec vtmp; 2309 2310 PetscFunctionBegin; 2311 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2312 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2313 if (idx) { 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2316 } 2317 } 2318 2319 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2320 if (idx) { 2321 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2322 } 2323 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2324 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2325 2326 for (i=0; i<A->rmap->n; i++) { 2327 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2328 va[i] = vb[i]; 2329 if (idx) idx[i] = a->garray[idxb[i]]; 2330 } 2331 } 2332 2333 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2334 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2335 ierr = PetscFree(idxb);CHKERRQ(ierr); 2336 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->cmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++) { 2365 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2379 { 2380 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2381 PetscInt n = A->rmap->n; 2382 PetscInt cstart = A->cmap->rstart; 2383 PetscInt *cmap = mat->garray; 2384 PetscInt *diagIdx, *offdiagIdx; 2385 Vec diagV, offdiagV; 2386 PetscScalar *a, *diagA, *offdiagA; 2387 PetscInt r; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2394 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2395 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2397 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2398 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2399 for (r = 0; r < n; ++r) { 2400 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 idx[r] = cstart + diagIdx[r]; 2403 } else { 2404 a[r] = offdiagA[r]; 2405 idx[r] = cmap[offdiagIdx[r]]; 2406 } 2407 } 2408 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2409 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2411 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2412 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2413 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2418 { 2419 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2420 PetscInt n = A->rmap->n; 2421 PetscInt cstart = A->cmap->rstart; 2422 PetscInt *cmap = mat->garray; 2423 PetscInt *diagIdx, *offdiagIdx; 2424 Vec diagV, offdiagV; 2425 PetscScalar *a, *diagA, *offdiagA; 2426 PetscInt r; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2433 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2434 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2436 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2437 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2438 for (r = 0; r < n; ++r) { 2439 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2440 a[r] = diagA[r]; 2441 idx[r] = cstart + diagIdx[r]; 2442 } else { 2443 a[r] = offdiagA[r]; 2444 idx[r] = cmap[offdiagIdx[r]]; 2445 } 2446 } 2447 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2450 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2451 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2452 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2457 { 2458 PetscErrorCode ierr; 2459 Mat *dummy; 2460 2461 PetscFunctionBegin; 2462 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2463 *newmat = *dummy; 2464 ierr = PetscFree(dummy);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2475 A->factorerrortype = a->A->factorerrortype; 2476 PetscFunctionReturn(0); 2477 } 2478 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2486 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2487 if (x->assembled) { 2488 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2489 } else { 2490 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2491 } 2492 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2498 { 2499 PetscFunctionBegin; 2500 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2501 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2502 PetscFunctionReturn(0); 2503 } 2504 2505 /*@ 2506 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2507 2508 Collective on Mat 2509 2510 Input Parameters: 2511 + A - the matrix 2512 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2513 2514 Level: advanced 2515 2516 @*/ 2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2527 { 2528 PetscErrorCode ierr; 2529 PetscBool sc = PETSC_FALSE,flg; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2533 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2534 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2535 if (flg) { 2536 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2537 } 2538 ierr = PetscOptionsTail();CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2546 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2547 2548 PetscFunctionBegin; 2549 if (!Y->preallocated) { 2550 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2551 } else if (!aij->nz) { 2552 PetscInt nonew = aij->nonew; 2553 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2554 aij->nonew = nonew; 2555 } 2556 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2561 { 2562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2563 PetscErrorCode ierr; 2564 2565 PetscFunctionBegin; 2566 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2567 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2568 if (d) { 2569 PetscInt rstart; 2570 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2571 *d += rstart; 2572 2573 } 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 /* -------------------------------------------------------------------*/ 2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2589 MatGetRow_MPIAIJ, 2590 MatRestoreRow_MPIAIJ, 2591 MatMult_MPIAIJ, 2592 /* 4*/ MatMultAdd_MPIAIJ, 2593 MatMultTranspose_MPIAIJ, 2594 MatMultTransposeAdd_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 /*10*/ 0, 2599 0, 2600 0, 2601 MatSOR_MPIAIJ, 2602 MatTranspose_MPIAIJ, 2603 /*15*/ MatGetInfo_MPIAIJ, 2604 MatEqual_MPIAIJ, 2605 MatGetDiagonal_MPIAIJ, 2606 MatDiagonalScale_MPIAIJ, 2607 MatNorm_MPIAIJ, 2608 /*20*/ MatAssemblyBegin_MPIAIJ, 2609 MatAssemblyEnd_MPIAIJ, 2610 MatSetOption_MPIAIJ, 2611 MatZeroEntries_MPIAIJ, 2612 /*24*/ MatZeroRows_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*29*/ MatSetUp_MPIAIJ, 2618 0, 2619 0, 2620 MatGetDiagonalBlock_MPIAIJ, 2621 0, 2622 /*34*/ MatDuplicate_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*39*/ MatAXPY_MPIAIJ, 2628 MatCreateSubMatrices_MPIAIJ, 2629 MatIncreaseOverlap_MPIAIJ, 2630 MatGetValues_MPIAIJ, 2631 MatCopy_MPIAIJ, 2632 /*44*/ MatGetRowMax_MPIAIJ, 2633 MatScale_MPIAIJ, 2634 MatShift_MPIAIJ, 2635 MatDiagonalSet_MPIAIJ, 2636 MatZeroRowsColumns_MPIAIJ, 2637 /*49*/ MatSetRandom_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*54*/ MatFDColoringCreate_MPIXAIJ, 2643 0, 2644 MatSetUnfactored_MPIAIJ, 2645 MatPermute_MPIAIJ, 2646 0, 2647 /*59*/ MatCreateSubMatrix_MPIAIJ, 2648 MatDestroy_MPIAIJ, 2649 MatView_MPIAIJ, 2650 0, 2651 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2652 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2653 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2658 MatGetRowMinAbs_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*75*/ MatFDColoringApply_AIJ, 2664 MatSetFromOptions_MPIAIJ, 2665 0, 2666 0, 2667 MatFindZeroDiagonals_MPIAIJ, 2668 /*80*/ 0, 2669 0, 2670 0, 2671 /*83*/ MatLoad_MPIAIJ, 2672 MatIsSymmetric_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2678 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2679 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2680 MatPtAP_MPIAIJ_MPIAIJ, 2681 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2682 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*99*/ 0, 2688 0, 2689 0, 2690 MatConjugate_MPIAIJ, 2691 0, 2692 /*104*/MatSetValuesRow_MPIAIJ, 2693 MatRealPart_MPIAIJ, 2694 MatImaginaryPart_MPIAIJ, 2695 0, 2696 0, 2697 /*109*/0, 2698 0, 2699 MatGetRowMin_MPIAIJ, 2700 0, 2701 MatMissingDiagonal_MPIAIJ, 2702 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2703 0, 2704 MatGetGhosts_MPIAIJ, 2705 0, 2706 0, 2707 /*119*/0, 2708 0, 2709 0, 2710 0, 2711 MatGetMultiProcBlock_MPIAIJ, 2712 /*124*/MatFindNonzeroRows_MPIAIJ, 2713 MatGetColumnNorms_MPIAIJ, 2714 MatInvertBlockDiagonal_MPIAIJ, 2715 MatInvertVariableBlockDiagonal_MPIAIJ, 2716 MatCreateSubMatricesMPI_MPIAIJ, 2717 /*129*/0, 2718 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2719 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2720 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 /*134*/0, 2723 0, 2724 MatRARt_MPIAIJ_MPIAIJ, 2725 0, 2726 0, 2727 /*139*/MatSetBlockSizes_MPIAIJ, 2728 0, 2729 0, 2730 MatFDColoringSetUp_MPIXAIJ, 2731 MatFindOffBlockDiagonalEntries_MPIAIJ, 2732 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2733 }; 2734 2735 /* ----------------------------------------------------------------------------------------*/ 2736 2737 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2738 { 2739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2744 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2749 { 2750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2755 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscMPIInt size; 2764 2765 PetscFunctionBegin; 2766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2768 b = (Mat_MPIAIJ*)B->data; 2769 2770 #if defined(PETSC_USE_CTABLE) 2771 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2772 #else 2773 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2774 #endif 2775 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2776 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2777 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2778 2779 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2780 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2781 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2782 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2783 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2784 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2785 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2787 2788 if (!B->preallocated) { 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2792 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2794 } 2795 2796 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2797 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2798 B->preallocated = PETSC_TRUE; 2799 B->was_assembled = PETSC_FALSE; 2800 B->assembled = PETSC_FALSE; 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2805 { 2806 Mat_MPIAIJ *b; 2807 PetscErrorCode ierr; 2808 2809 PetscFunctionBegin; 2810 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2811 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2812 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2813 b = (Mat_MPIAIJ*)B->data; 2814 2815 #if defined(PETSC_USE_CTABLE) 2816 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2817 #else 2818 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2819 #endif 2820 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2821 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2822 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2823 2824 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2825 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2826 B->preallocated = PETSC_TRUE; 2827 B->was_assembled = PETSC_FALSE; 2828 B->assembled = PETSC_FALSE; 2829 PetscFunctionReturn(0); 2830 } 2831 2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2833 { 2834 Mat mat; 2835 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 *newmat = 0; 2840 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2841 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2842 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2843 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2844 a = (Mat_MPIAIJ*)mat->data; 2845 2846 mat->factortype = matin->factortype; 2847 mat->assembled = PETSC_TRUE; 2848 mat->insertmode = NOT_SET_VALUES; 2849 mat->preallocated = PETSC_TRUE; 2850 2851 a->size = oldmat->size; 2852 a->rank = oldmat->rank; 2853 a->donotstash = oldmat->donotstash; 2854 a->roworiented = oldmat->roworiented; 2855 a->rowindices = 0; 2856 a->rowvalues = 0; 2857 a->getrowactive = PETSC_FALSE; 2858 2859 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2860 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2861 2862 if (oldmat->colmap) { 2863 #if defined(PETSC_USE_CTABLE) 2864 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2865 #else 2866 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2867 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2868 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2869 #endif 2870 } else a->colmap = 0; 2871 if (oldmat->garray) { 2872 PetscInt len; 2873 len = oldmat->B->cmap->n; 2874 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2875 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2876 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2877 } else a->garray = 0; 2878 2879 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2881 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2883 2884 if (oldmat->Mvctx_mpi1) { 2885 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2887 } 2888 2889 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2891 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2892 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2893 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2894 *newmat = mat; 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2899 { 2900 PetscBool isbinary, ishdf5; 2901 PetscErrorCode ierr; 2902 2903 PetscFunctionBegin; 2904 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2905 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2906 /* force binary viewer to load .info file if it has not yet done so */ 2907 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2908 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2909 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2910 if (isbinary) { 2911 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2912 } else if (ishdf5) { 2913 #if defined(PETSC_HAVE_HDF5) 2914 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2915 #else 2916 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2917 #endif 2918 } else { 2919 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2920 } 2921 PetscFunctionReturn(0); 2922 } 2923 2924 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2925 { 2926 PetscScalar *vals,*svals; 2927 MPI_Comm comm; 2928 PetscErrorCode ierr; 2929 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2930 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2931 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2932 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2933 PetscInt cend,cstart,n,*rowners; 2934 int fd; 2935 PetscInt bs = newMat->rmap->bs; 2936 2937 PetscFunctionBegin; 2938 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2940 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2942 if (!rank) { 2943 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2944 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2945 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2946 } 2947 2948 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2949 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2950 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2951 if (bs < 0) bs = 1; 2952 2953 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2954 M = header[1]; N = header[2]; 2955 2956 /* If global sizes are set, check if they are consistent with that given in the file */ 2957 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2958 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2959 2960 /* determine ownership of all (block) rows */ 2961 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2962 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2963 else m = newMat->rmap->n; /* Set by user */ 2964 2965 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2966 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2967 2968 /* First process needs enough room for process with most rows */ 2969 if (!rank) { 2970 mmax = rowners[1]; 2971 for (i=2; i<=size; i++) { 2972 mmax = PetscMax(mmax, rowners[i]); 2973 } 2974 } else mmax = -1; /* unused, but compilers complain */ 2975 2976 rowners[0] = 0; 2977 for (i=2; i<=size; i++) { 2978 rowners[i] += rowners[i-1]; 2979 } 2980 rstart = rowners[rank]; 2981 rend = rowners[rank+1]; 2982 2983 /* distribute row lengths to all processors */ 2984 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2985 if (!rank) { 2986 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 2987 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2988 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2989 for (j=0; j<m; j++) { 2990 procsnz[0] += ourlens[j]; 2991 } 2992 for (i=1; i<size; i++) { 2993 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 2994 /* calculate the number of nonzeros on each processor */ 2995 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2996 procsnz[i] += rowlengths[j]; 2997 } 2998 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2999 } 3000 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3001 } else { 3002 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3003 } 3004 3005 if (!rank) { 3006 /* determine max buffer needed and allocate it */ 3007 maxnz = 0; 3008 for (i=0; i<size; i++) { 3009 maxnz = PetscMax(maxnz,procsnz[i]); 3010 } 3011 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3012 3013 /* read in my part of the matrix column indices */ 3014 nz = procsnz[0]; 3015 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3016 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3017 3018 /* read in every one elses and ship off */ 3019 for (i=1; i<size; i++) { 3020 nz = procsnz[i]; 3021 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3022 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3023 } 3024 ierr = PetscFree(cols);CHKERRQ(ierr); 3025 } else { 3026 /* determine buffer space needed for message */ 3027 nz = 0; 3028 for (i=0; i<m; i++) { 3029 nz += ourlens[i]; 3030 } 3031 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3032 3033 /* receive message of column indices*/ 3034 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3035 } 3036 3037 /* determine column ownership if matrix is not square */ 3038 if (N != M) { 3039 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3040 else n = newMat->cmap->n; 3041 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3042 cstart = cend - n; 3043 } else { 3044 cstart = rstart; 3045 cend = rend; 3046 n = cend - cstart; 3047 } 3048 3049 /* loop over local rows, determining number of off diagonal entries */ 3050 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3051 jj = 0; 3052 for (i=0; i<m; i++) { 3053 for (j=0; j<ourlens[i]; j++) { 3054 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3055 jj++; 3056 } 3057 } 3058 3059 for (i=0; i<m; i++) { 3060 ourlens[i] -= offlens[i]; 3061 } 3062 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3063 3064 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3065 3066 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3067 3068 for (i=0; i<m; i++) { 3069 ourlens[i] += offlens[i]; 3070 } 3071 3072 if (!rank) { 3073 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3074 3075 /* read in my part of the matrix numerical values */ 3076 nz = procsnz[0]; 3077 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3078 3079 /* insert into matrix */ 3080 jj = rstart; 3081 smycols = mycols; 3082 svals = vals; 3083 for (i=0; i<m; i++) { 3084 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3085 smycols += ourlens[i]; 3086 svals += ourlens[i]; 3087 jj++; 3088 } 3089 3090 /* read in other processors and ship out */ 3091 for (i=1; i<size; i++) { 3092 nz = procsnz[i]; 3093 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3094 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3095 } 3096 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3097 } else { 3098 /* receive numeric values */ 3099 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3100 3101 /* receive message of values*/ 3102 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3103 3104 /* insert into matrix */ 3105 jj = rstart; 3106 smycols = mycols; 3107 svals = vals; 3108 for (i=0; i<m; i++) { 3109 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3110 smycols += ourlens[i]; 3111 svals += ourlens[i]; 3112 jj++; 3113 } 3114 } 3115 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3116 ierr = PetscFree(vals);CHKERRQ(ierr); 3117 ierr = PetscFree(mycols);CHKERRQ(ierr); 3118 ierr = PetscFree(rowners);CHKERRQ(ierr); 3119 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3120 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 /* Not scalable because of ISAllGather() unless getting all columns. */ 3125 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3126 { 3127 PetscErrorCode ierr; 3128 IS iscol_local; 3129 PetscBool isstride; 3130 PetscMPIInt lisstride=0,gisstride; 3131 3132 PetscFunctionBegin; 3133 /* check if we are grabbing all columns*/ 3134 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3135 3136 if (isstride) { 3137 PetscInt start,len,mstart,mlen; 3138 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3139 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3140 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3141 if (mstart == start && mlen-mstart == len) lisstride = 1; 3142 } 3143 3144 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3145 if (gisstride) { 3146 PetscInt N; 3147 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3148 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3149 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3150 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3151 } else { 3152 PetscInt cbs; 3153 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3154 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3155 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3156 } 3157 3158 *isseq = iscol_local; 3159 PetscFunctionReturn(0); 3160 } 3161 3162 /* 3163 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3164 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3165 3166 Input Parameters: 3167 mat - matrix 3168 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3169 i.e., mat->rstart <= isrow[i] < mat->rend 3170 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3171 i.e., mat->cstart <= iscol[i] < mat->cend 3172 Output Parameter: 3173 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3174 iscol_o - sequential column index set for retrieving mat->B 3175 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3176 */ 3177 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3178 { 3179 PetscErrorCode ierr; 3180 Vec x,cmap; 3181 const PetscInt *is_idx; 3182 PetscScalar *xarray,*cmaparray; 3183 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3184 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3185 Mat B=a->B; 3186 Vec lvec=a->lvec,lcmap; 3187 PetscInt i,cstart,cend,Bn=B->cmap->N; 3188 MPI_Comm comm; 3189 VecScatter Mvctx=a->Mvctx; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3193 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3194 3195 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3196 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3197 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3198 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3199 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3200 3201 /* Get start indices */ 3202 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3203 isstart -= ncols; 3204 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3205 3206 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3207 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3208 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3209 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3210 for (i=0; i<ncols; i++) { 3211 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3212 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3213 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3214 } 3215 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3216 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3217 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3218 3219 /* Get iscol_d */ 3220 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3221 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3222 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3223 3224 /* Get isrow_d */ 3225 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3226 rstart = mat->rmap->rstart; 3227 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3228 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3229 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3230 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3231 3232 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3233 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3234 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3235 3236 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3237 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3239 3240 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3241 3242 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 3245 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3246 /* off-process column indices */ 3247 count = 0; 3248 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3249 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3250 3251 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3252 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3253 for (i=0; i<Bn; i++) { 3254 if (PetscRealPart(xarray[i]) > -1.0) { 3255 idx[count] = i; /* local column index in off-diagonal part B */ 3256 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3257 count++; 3258 } 3259 } 3260 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3261 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3262 3263 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3264 /* cannot ensure iscol_o has same blocksize as iscol! */ 3265 3266 ierr = PetscFree(idx);CHKERRQ(ierr); 3267 *garray = cmap1; 3268 3269 ierr = VecDestroy(&x);CHKERRQ(ierr); 3270 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3271 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3272 PetscFunctionReturn(0); 3273 } 3274 3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3277 { 3278 PetscErrorCode ierr; 3279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3280 Mat M = NULL; 3281 MPI_Comm comm; 3282 IS iscol_d,isrow_d,iscol_o; 3283 Mat Asub = NULL,Bsub = NULL; 3284 PetscInt n; 3285 3286 PetscFunctionBegin; 3287 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3288 3289 if (call == MAT_REUSE_MATRIX) { 3290 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3291 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3292 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3293 3294 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3295 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3296 3297 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3298 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3299 3300 /* Update diagonal and off-diagonal portions of submat */ 3301 asub = (Mat_MPIAIJ*)(*submat)->data; 3302 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3303 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3304 if (n) { 3305 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3306 } 3307 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3308 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3309 3310 } else { /* call == MAT_INITIAL_MATRIX) */ 3311 const PetscInt *garray; 3312 PetscInt BsubN; 3313 3314 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3315 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3316 3317 /* Create local submatrices Asub and Bsub */ 3318 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3319 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3320 3321 /* Create submatrix M */ 3322 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3323 3324 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3325 asub = (Mat_MPIAIJ*)M->data; 3326 3327 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3328 n = asub->B->cmap->N; 3329 if (BsubN > n) { 3330 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3331 const PetscInt *idx; 3332 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3333 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3334 3335 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3336 j = 0; 3337 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3338 for (i=0; i<n; i++) { 3339 if (j >= BsubN) break; 3340 while (subgarray[i] > garray[j]) j++; 3341 3342 if (subgarray[i] == garray[j]) { 3343 idx_new[i] = idx[j++]; 3344 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3345 } 3346 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3347 3348 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3349 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3350 3351 } else if (BsubN < n) { 3352 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3353 } 3354 3355 ierr = PetscFree(garray);CHKERRQ(ierr); 3356 *submat = M; 3357 3358 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3359 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3360 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3361 3362 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3363 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3364 3365 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3366 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3367 } 3368 PetscFunctionReturn(0); 3369 } 3370 3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3372 { 3373 PetscErrorCode ierr; 3374 IS iscol_local=NULL,isrow_d; 3375 PetscInt csize; 3376 PetscInt n,i,j,start,end; 3377 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3378 MPI_Comm comm; 3379 3380 PetscFunctionBegin; 3381 /* If isrow has same processor distribution as mat, 3382 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3383 if (call == MAT_REUSE_MATRIX) { 3384 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3385 if (isrow_d) { 3386 sameRowDist = PETSC_TRUE; 3387 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3388 } else { 3389 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3390 if (iscol_local) { 3391 sameRowDist = PETSC_TRUE; 3392 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3393 } 3394 } 3395 } else { 3396 /* Check if isrow has same processor distribution as mat */ 3397 sameDist[0] = PETSC_FALSE; 3398 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3399 if (!n) { 3400 sameDist[0] = PETSC_TRUE; 3401 } else { 3402 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3403 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3404 if (i >= start && j < end) { 3405 sameDist[0] = PETSC_TRUE; 3406 } 3407 } 3408 3409 /* Check if iscol has same processor distribution as mat */ 3410 sameDist[1] = PETSC_FALSE; 3411 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3412 if (!n) { 3413 sameDist[1] = PETSC_TRUE; 3414 } else { 3415 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3416 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3417 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3418 } 3419 3420 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3421 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3422 sameRowDist = tsameDist[0]; 3423 } 3424 3425 if (sameRowDist) { 3426 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3427 /* isrow and iscol have same processor distribution as mat */ 3428 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3429 PetscFunctionReturn(0); 3430 } else { /* sameRowDist */ 3431 /* isrow has same processor distribution as mat */ 3432 if (call == MAT_INITIAL_MATRIX) { 3433 PetscBool sorted; 3434 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3435 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3436 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3437 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3438 3439 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3440 if (sorted) { 3441 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3442 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 } else { /* call == MAT_REUSE_MATRIX */ 3446 IS iscol_sub; 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3448 if (iscol_sub) { 3449 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 } 3453 } 3454 } 3455 3456 /* General case: iscol -> iscol_local which has global size of iscol */ 3457 if (call == MAT_REUSE_MATRIX) { 3458 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3459 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3460 } else { 3461 if (!iscol_local) { 3462 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3463 } 3464 } 3465 3466 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3467 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3468 3469 if (call == MAT_INITIAL_MATRIX) { 3470 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3471 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3472 } 3473 PetscFunctionReturn(0); 3474 } 3475 3476 /*@C 3477 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3478 and "off-diagonal" part of the matrix in CSR format. 3479 3480 Collective 3481 3482 Input Parameters: 3483 + comm - MPI communicator 3484 . A - "diagonal" portion of matrix 3485 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3486 - garray - global index of B columns 3487 3488 Output Parameter: 3489 . mat - the matrix, with input A as its local diagonal matrix 3490 Level: advanced 3491 3492 Notes: 3493 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3494 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3495 3496 .seealso: MatCreateMPIAIJWithSplitArrays() 3497 @*/ 3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3499 { 3500 PetscErrorCode ierr; 3501 Mat_MPIAIJ *maij; 3502 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3503 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3504 PetscScalar *oa=b->a; 3505 Mat Bnew; 3506 PetscInt m,n,N; 3507 3508 PetscFunctionBegin; 3509 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3510 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3511 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3512 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3513 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3514 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3515 3516 /* Get global columns of mat */ 3517 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3518 3519 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3520 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3521 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3522 maij = (Mat_MPIAIJ*)(*mat)->data; 3523 3524 (*mat)->preallocated = PETSC_TRUE; 3525 3526 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3527 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3528 3529 /* Set A as diagonal portion of *mat */ 3530 maij->A = A; 3531 3532 nz = oi[m]; 3533 for (i=0; i<nz; i++) { 3534 col = oj[i]; 3535 oj[i] = garray[col]; 3536 } 3537 3538 /* Set Bnew as off-diagonal portion of *mat */ 3539 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3540 bnew = (Mat_SeqAIJ*)Bnew->data; 3541 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3542 maij->B = Bnew; 3543 3544 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3545 3546 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3547 b->free_a = PETSC_FALSE; 3548 b->free_ij = PETSC_FALSE; 3549 ierr = MatDestroy(&B);CHKERRQ(ierr); 3550 3551 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3552 bnew->free_a = PETSC_TRUE; 3553 bnew->free_ij = PETSC_TRUE; 3554 3555 /* condense columns of maij->B */ 3556 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3557 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3558 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3559 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3560 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3565 3566 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3567 { 3568 PetscErrorCode ierr; 3569 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3570 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3571 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3572 Mat M,Msub,B=a->B; 3573 MatScalar *aa; 3574 Mat_SeqAIJ *aij; 3575 PetscInt *garray = a->garray,*colsub,Ncols; 3576 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3577 IS iscol_sub,iscmap; 3578 const PetscInt *is_idx,*cmap; 3579 PetscBool allcolumns=PETSC_FALSE; 3580 MPI_Comm comm; 3581 3582 PetscFunctionBegin; 3583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3584 3585 if (call == MAT_REUSE_MATRIX) { 3586 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3587 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3588 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3589 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3591 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3592 3593 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3594 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3595 3596 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3597 3598 } else { /* call == MAT_INITIAL_MATRIX) */ 3599 PetscBool flg; 3600 3601 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3602 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3603 3604 /* (1) iscol -> nonscalable iscol_local */ 3605 /* Check for special case: each processor gets entire matrix columns */ 3606 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3607 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3608 if (allcolumns) { 3609 iscol_sub = iscol_local; 3610 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3611 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3612 3613 } else { 3614 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3615 PetscInt *idx,*cmap1,k; 3616 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3617 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3618 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3619 count = 0; 3620 k = 0; 3621 for (i=0; i<Ncols; i++) { 3622 j = is_idx[i]; 3623 if (j >= cstart && j < cend) { 3624 /* diagonal part of mat */ 3625 idx[count] = j; 3626 cmap1[count++] = i; /* column index in submat */ 3627 } else if (Bn) { 3628 /* off-diagonal part of mat */ 3629 if (j == garray[k]) { 3630 idx[count] = j; 3631 cmap1[count++] = i; /* column index in submat */ 3632 } else if (j > garray[k]) { 3633 while (j > garray[k] && k < Bn-1) k++; 3634 if (j == garray[k]) { 3635 idx[count] = j; 3636 cmap1[count++] = i; /* column index in submat */ 3637 } 3638 } 3639 } 3640 } 3641 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3642 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3644 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3645 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3646 3647 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3648 } 3649 3650 /* (3) Create sequential Msub */ 3651 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3652 } 3653 3654 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3655 aij = (Mat_SeqAIJ*)(Msub)->data; 3656 ii = aij->i; 3657 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3658 3659 /* 3660 m - number of local rows 3661 Ncols - number of columns (same on all processors) 3662 rstart - first row in new global matrix generated 3663 */ 3664 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3665 3666 if (call == MAT_INITIAL_MATRIX) { 3667 /* (4) Create parallel newmat */ 3668 PetscMPIInt rank,size; 3669 PetscInt csize; 3670 3671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3672 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3673 3674 /* 3675 Determine the number of non-zeros in the diagonal and off-diagonal 3676 portions of the matrix in order to do correct preallocation 3677 */ 3678 3679 /* first get start and end of "diagonal" columns */ 3680 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3681 if (csize == PETSC_DECIDE) { 3682 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3683 if (mglobal == Ncols) { /* square matrix */ 3684 nlocal = m; 3685 } else { 3686 nlocal = Ncols/size + ((Ncols % size) > rank); 3687 } 3688 } else { 3689 nlocal = csize; 3690 } 3691 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3692 rstart = rend - nlocal; 3693 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3694 3695 /* next, compute all the lengths */ 3696 jj = aij->j; 3697 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3698 olens = dlens + m; 3699 for (i=0; i<m; i++) { 3700 jend = ii[i+1] - ii[i]; 3701 olen = 0; 3702 dlen = 0; 3703 for (j=0; j<jend; j++) { 3704 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3705 else dlen++; 3706 jj++; 3707 } 3708 olens[i] = olen; 3709 dlens[i] = dlen; 3710 } 3711 3712 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3713 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3714 3715 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3716 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3717 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3718 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3719 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3720 ierr = PetscFree(dlens);CHKERRQ(ierr); 3721 3722 } else { /* call == MAT_REUSE_MATRIX */ 3723 M = *newmat; 3724 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3725 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3726 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3727 /* 3728 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3729 rather than the slower MatSetValues(). 3730 */ 3731 M->was_assembled = PETSC_TRUE; 3732 M->assembled = PETSC_FALSE; 3733 } 3734 3735 /* (5) Set values of Msub to *newmat */ 3736 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3737 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3738 3739 jj = aij->j; 3740 aa = aij->a; 3741 for (i=0; i<m; i++) { 3742 row = rstart + i; 3743 nz = ii[i+1] - ii[i]; 3744 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3745 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3746 jj += nz; aa += nz; 3747 } 3748 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3749 3750 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 3753 ierr = PetscFree(colsub);CHKERRQ(ierr); 3754 3755 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3756 if (call == MAT_INITIAL_MATRIX) { 3757 *newmat = M; 3758 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3759 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3760 3761 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3762 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3763 3764 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3765 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3766 3767 if (iscol_local) { 3768 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3769 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3770 } 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 /* 3776 Not great since it makes two copies of the submatrix, first an SeqAIJ 3777 in local and then by concatenating the local matrices the end result. 3778 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3779 3780 Note: This requires a sequential iscol with all indices. 3781 */ 3782 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3783 { 3784 PetscErrorCode ierr; 3785 PetscMPIInt rank,size; 3786 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3787 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3788 Mat M,Mreuse; 3789 MatScalar *aa,*vwork; 3790 MPI_Comm comm; 3791 Mat_SeqAIJ *aij; 3792 PetscBool colflag,allcolumns=PETSC_FALSE; 3793 3794 PetscFunctionBegin; 3795 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3796 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3798 3799 /* Check for special case: each processor gets entire matrix columns */ 3800 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3801 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3802 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3803 3804 if (call == MAT_REUSE_MATRIX) { 3805 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3806 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3807 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3808 } else { 3809 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3810 } 3811 3812 /* 3813 m - number of local rows 3814 n - number of columns (same on all processors) 3815 rstart - first row in new global matrix generated 3816 */ 3817 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3818 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3819 if (call == MAT_INITIAL_MATRIX) { 3820 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3821 ii = aij->i; 3822 jj = aij->j; 3823 3824 /* 3825 Determine the number of non-zeros in the diagonal and off-diagonal 3826 portions of the matrix in order to do correct preallocation 3827 */ 3828 3829 /* first get start and end of "diagonal" columns */ 3830 if (csize == PETSC_DECIDE) { 3831 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3832 if (mglobal == n) { /* square matrix */ 3833 nlocal = m; 3834 } else { 3835 nlocal = n/size + ((n % size) > rank); 3836 } 3837 } else { 3838 nlocal = csize; 3839 } 3840 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3841 rstart = rend - nlocal; 3842 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3843 3844 /* next, compute all the lengths */ 3845 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3846 olens = dlens + m; 3847 for (i=0; i<m; i++) { 3848 jend = ii[i+1] - ii[i]; 3849 olen = 0; 3850 dlen = 0; 3851 for (j=0; j<jend; j++) { 3852 if (*jj < rstart || *jj >= rend) olen++; 3853 else dlen++; 3854 jj++; 3855 } 3856 olens[i] = olen; 3857 dlens[i] = dlen; 3858 } 3859 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3860 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3861 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3862 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3863 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3864 ierr = PetscFree(dlens);CHKERRQ(ierr); 3865 } else { 3866 PetscInt ml,nl; 3867 3868 M = *newmat; 3869 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3870 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3871 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3872 /* 3873 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3874 rather than the slower MatSetValues(). 3875 */ 3876 M->was_assembled = PETSC_TRUE; 3877 M->assembled = PETSC_FALSE; 3878 } 3879 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3880 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3881 ii = aij->i; 3882 jj = aij->j; 3883 aa = aij->a; 3884 for (i=0; i<m; i++) { 3885 row = rstart + i; 3886 nz = ii[i+1] - ii[i]; 3887 cwork = jj; jj += nz; 3888 vwork = aa; aa += nz; 3889 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3890 } 3891 3892 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 *newmat = M; 3895 3896 /* save submatrix used in processor for next request */ 3897 if (call == MAT_INITIAL_MATRIX) { 3898 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3899 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3900 } 3901 PetscFunctionReturn(0); 3902 } 3903 3904 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3905 { 3906 PetscInt m,cstart, cend,j,nnz,i,d; 3907 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3908 const PetscInt *JJ; 3909 PetscScalar *values; 3910 PetscErrorCode ierr; 3911 PetscBool nooffprocentries; 3912 3913 PetscFunctionBegin; 3914 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3915 3916 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3917 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3918 m = B->rmap->n; 3919 cstart = B->cmap->rstart; 3920 cend = B->cmap->rend; 3921 rstart = B->rmap->rstart; 3922 3923 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3924 3925 #if defined(PETSC_USE_DEBUG) 3926 for (i=0; i<m && Ii; i++) { 3927 nnz = Ii[i+1]- Ii[i]; 3928 JJ = J + Ii[i]; 3929 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3930 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3931 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3932 } 3933 #endif 3934 3935 for (i=0; i<m && Ii; i++) { 3936 nnz = Ii[i+1]- Ii[i]; 3937 JJ = J + Ii[i]; 3938 nnz_max = PetscMax(nnz_max,nnz); 3939 d = 0; 3940 for (j=0; j<nnz; j++) { 3941 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3942 } 3943 d_nnz[i] = d; 3944 o_nnz[i] = nnz - d; 3945 } 3946 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3947 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3948 3949 if (v) values = (PetscScalar*)v; 3950 else { 3951 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3952 } 3953 3954 for (i=0; i<m && Ii; i++) { 3955 ii = i + rstart; 3956 nnz = Ii[i+1]- Ii[i]; 3957 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3958 } 3959 nooffprocentries = B->nooffprocentries; 3960 B->nooffprocentries = PETSC_TRUE; 3961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 B->nooffprocentries = nooffprocentries; 3964 3965 if (!v) { 3966 ierr = PetscFree(values);CHKERRQ(ierr); 3967 } 3968 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3969 PetscFunctionReturn(0); 3970 } 3971 3972 /*@ 3973 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3974 (the default parallel PETSc format). 3975 3976 Collective 3977 3978 Input Parameters: 3979 + B - the matrix 3980 . i - the indices into j for the start of each local row (starts with zero) 3981 . j - the column indices for each local row (starts with zero) 3982 - v - optional values in the matrix 3983 3984 Level: developer 3985 3986 Notes: 3987 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3988 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3989 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3990 3991 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3992 3993 The format which is used for the sparse matrix input, is equivalent to a 3994 row-major ordering.. i.e for the following matrix, the input data expected is 3995 as shown 3996 3997 $ 1 0 0 3998 $ 2 0 3 P0 3999 $ ------- 4000 $ 4 5 6 P1 4001 $ 4002 $ Process0 [P0]: rows_owned=[0,1] 4003 $ i = {0,1,3} [size = nrow+1 = 2+1] 4004 $ j = {0,0,2} [size = 3] 4005 $ v = {1,2,3} [size = 3] 4006 $ 4007 $ Process1 [P1]: rows_owned=[2] 4008 $ i = {0,3} [size = nrow+1 = 1+1] 4009 $ j = {0,1,2} [size = 3] 4010 $ v = {4,5,6} [size = 3] 4011 4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4013 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4014 @*/ 4015 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4016 { 4017 PetscErrorCode ierr; 4018 4019 PetscFunctionBegin; 4020 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4021 PetscFunctionReturn(0); 4022 } 4023 4024 /*@C 4025 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4026 (the default parallel PETSc format). For good matrix assembly performance 4027 the user should preallocate the matrix storage by setting the parameters 4028 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4029 performance can be increased by more than a factor of 50. 4030 4031 Collective 4032 4033 Input Parameters: 4034 + B - the matrix 4035 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4036 (same value is used for all local rows) 4037 . d_nnz - array containing the number of nonzeros in the various rows of the 4038 DIAGONAL portion of the local submatrix (possibly different for each row) 4039 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4040 The size of this array is equal to the number of local rows, i.e 'm'. 4041 For matrices that will be factored, you must leave room for (and set) 4042 the diagonal entry even if it is zero. 4043 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4044 submatrix (same value is used for all local rows). 4045 - o_nnz - array containing the number of nonzeros in the various rows of the 4046 OFF-DIAGONAL portion of the local submatrix (possibly different for 4047 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4048 structure. The size of this array is equal to the number 4049 of local rows, i.e 'm'. 4050 4051 If the *_nnz parameter is given then the *_nz parameter is ignored 4052 4053 The AIJ format (also called the Yale sparse matrix format or 4054 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4055 storage. The stored row and column indices begin with zero. 4056 See Users-Manual: ch_mat for details. 4057 4058 The parallel matrix is partitioned such that the first m0 rows belong to 4059 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4060 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4061 4062 The DIAGONAL portion of the local submatrix of a processor can be defined 4063 as the submatrix which is obtained by extraction the part corresponding to 4064 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4065 first row that belongs to the processor, r2 is the last row belonging to 4066 the this processor, and c1-c2 is range of indices of the local part of a 4067 vector suitable for applying the matrix to. This is an mxn matrix. In the 4068 common case of a square matrix, the row and column ranges are the same and 4069 the DIAGONAL part is also square. The remaining portion of the local 4070 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4071 4072 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4073 4074 You can call MatGetInfo() to get information on how effective the preallocation was; 4075 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4076 You can also run with the option -info and look for messages with the string 4077 malloc in them to see if additional memory allocation was needed. 4078 4079 Example usage: 4080 4081 Consider the following 8x8 matrix with 34 non-zero values, that is 4082 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4083 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4084 as follows: 4085 4086 .vb 4087 1 2 0 | 0 3 0 | 0 4 4088 Proc0 0 5 6 | 7 0 0 | 8 0 4089 9 0 10 | 11 0 0 | 12 0 4090 ------------------------------------- 4091 13 0 14 | 15 16 17 | 0 0 4092 Proc1 0 18 0 | 19 20 21 | 0 0 4093 0 0 0 | 22 23 0 | 24 0 4094 ------------------------------------- 4095 Proc2 25 26 27 | 0 0 28 | 29 0 4096 30 0 0 | 31 32 33 | 0 34 4097 .ve 4098 4099 This can be represented as a collection of submatrices as: 4100 4101 .vb 4102 A B C 4103 D E F 4104 G H I 4105 .ve 4106 4107 Where the submatrices A,B,C are owned by proc0, D,E,F are 4108 owned by proc1, G,H,I are owned by proc2. 4109 4110 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4111 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4112 The 'M','N' parameters are 8,8, and have the same values on all procs. 4113 4114 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4115 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4116 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4117 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4118 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4119 matrix, ans [DF] as another SeqAIJ matrix. 4120 4121 When d_nz, o_nz parameters are specified, d_nz storage elements are 4122 allocated for every row of the local diagonal submatrix, and o_nz 4123 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4124 One way to choose d_nz and o_nz is to use the max nonzerors per local 4125 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4126 In this case, the values of d_nz,o_nz are: 4127 .vb 4128 proc0 : dnz = 2, o_nz = 2 4129 proc1 : dnz = 3, o_nz = 2 4130 proc2 : dnz = 1, o_nz = 4 4131 .ve 4132 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4133 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4134 for proc3. i.e we are using 12+15+10=37 storage locations to store 4135 34 values. 4136 4137 When d_nnz, o_nnz parameters are specified, the storage is specified 4138 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4139 In the above case the values for d_nnz,o_nnz are: 4140 .vb 4141 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4142 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4143 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4144 .ve 4145 Here the space allocated is sum of all the above values i.e 34, and 4146 hence pre-allocation is perfect. 4147 4148 Level: intermediate 4149 4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4151 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4152 @*/ 4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4154 { 4155 PetscErrorCode ierr; 4156 4157 PetscFunctionBegin; 4158 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4159 PetscValidType(B,1); 4160 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4161 PetscFunctionReturn(0); 4162 } 4163 4164 /*@ 4165 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4166 CSR format the local rows. 4167 4168 Collective 4169 4170 Input Parameters: 4171 + comm - MPI communicator 4172 . m - number of local rows (Cannot be PETSC_DECIDE) 4173 . n - This value should be the same as the local size used in creating the 4174 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4175 calculated if N is given) For square matrices n is almost always m. 4176 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4177 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4178 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4179 . j - column indices 4180 - a - matrix values 4181 4182 Output Parameter: 4183 . mat - the matrix 4184 4185 Level: intermediate 4186 4187 Notes: 4188 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4189 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4190 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4191 4192 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4193 4194 The format which is used for the sparse matrix input, is equivalent to a 4195 row-major ordering.. i.e for the following matrix, the input data expected is 4196 as shown 4197 4198 $ 1 0 0 4199 $ 2 0 3 P0 4200 $ ------- 4201 $ 4 5 6 P1 4202 $ 4203 $ Process0 [P0]: rows_owned=[0,1] 4204 $ i = {0,1,3} [size = nrow+1 = 2+1] 4205 $ j = {0,0,2} [size = 3] 4206 $ v = {1,2,3} [size = 3] 4207 $ 4208 $ Process1 [P1]: rows_owned=[2] 4209 $ i = {0,3} [size = nrow+1 = 1+1] 4210 $ j = {0,1,2} [size = 3] 4211 $ v = {4,5,6} [size = 3] 4212 4213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4214 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4215 @*/ 4216 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4217 { 4218 PetscErrorCode ierr; 4219 4220 PetscFunctionBegin; 4221 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4222 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4223 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4224 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4225 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4226 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4227 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4228 PetscFunctionReturn(0); 4229 } 4230 4231 /*@C 4232 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4233 (the default parallel PETSc format). For good matrix assembly performance 4234 the user should preallocate the matrix storage by setting the parameters 4235 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4236 performance can be increased by more than a factor of 50. 4237 4238 Collective 4239 4240 Input Parameters: 4241 + comm - MPI communicator 4242 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4243 This value should be the same as the local size used in creating the 4244 y vector for the matrix-vector product y = Ax. 4245 . n - This value should be the same as the local size used in creating the 4246 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4247 calculated if N is given) For square matrices n is almost always m. 4248 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4249 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4250 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4251 (same value is used for all local rows) 4252 . d_nnz - array containing the number of nonzeros in the various rows of the 4253 DIAGONAL portion of the local submatrix (possibly different for each row) 4254 or NULL, if d_nz is used to specify the nonzero structure. 4255 The size of this array is equal to the number of local rows, i.e 'm'. 4256 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4257 submatrix (same value is used for all local rows). 4258 - o_nnz - array containing the number of nonzeros in the various rows of the 4259 OFF-DIAGONAL portion of the local submatrix (possibly different for 4260 each row) or NULL, if o_nz is used to specify the nonzero 4261 structure. The size of this array is equal to the number 4262 of local rows, i.e 'm'. 4263 4264 Output Parameter: 4265 . A - the matrix 4266 4267 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4268 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4269 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4270 4271 Notes: 4272 If the *_nnz parameter is given then the *_nz parameter is ignored 4273 4274 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4275 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4276 storage requirements for this matrix. 4277 4278 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4279 processor than it must be used on all processors that share the object for 4280 that argument. 4281 4282 The user MUST specify either the local or global matrix dimensions 4283 (possibly both). 4284 4285 The parallel matrix is partitioned across processors such that the 4286 first m0 rows belong to process 0, the next m1 rows belong to 4287 process 1, the next m2 rows belong to process 2 etc.. where 4288 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4289 values corresponding to [m x N] submatrix. 4290 4291 The columns are logically partitioned with the n0 columns belonging 4292 to 0th partition, the next n1 columns belonging to the next 4293 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4294 4295 The DIAGONAL portion of the local submatrix on any given processor 4296 is the submatrix corresponding to the rows and columns m,n 4297 corresponding to the given processor. i.e diagonal matrix on 4298 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4299 etc. The remaining portion of the local submatrix [m x (N-n)] 4300 constitute the OFF-DIAGONAL portion. The example below better 4301 illustrates this concept. 4302 4303 For a square global matrix we define each processor's diagonal portion 4304 to be its local rows and the corresponding columns (a square submatrix); 4305 each processor's off-diagonal portion encompasses the remainder of the 4306 local matrix (a rectangular submatrix). 4307 4308 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4309 4310 When calling this routine with a single process communicator, a matrix of 4311 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4312 type of communicator, use the construction mechanism 4313 .vb 4314 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4315 .ve 4316 4317 $ MatCreate(...,&A); 4318 $ MatSetType(A,MATMPIAIJ); 4319 $ MatSetSizes(A, m,n,M,N); 4320 $ MatMPIAIJSetPreallocation(A,...); 4321 4322 By default, this format uses inodes (identical nodes) when possible. 4323 We search for consecutive rows with the same nonzero structure, thereby 4324 reusing matrix information to achieve increased efficiency. 4325 4326 Options Database Keys: 4327 + -mat_no_inode - Do not use inodes 4328 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4329 4330 4331 4332 Example usage: 4333 4334 Consider the following 8x8 matrix with 34 non-zero values, that is 4335 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4336 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4337 as follows 4338 4339 .vb 4340 1 2 0 | 0 3 0 | 0 4 4341 Proc0 0 5 6 | 7 0 0 | 8 0 4342 9 0 10 | 11 0 0 | 12 0 4343 ------------------------------------- 4344 13 0 14 | 15 16 17 | 0 0 4345 Proc1 0 18 0 | 19 20 21 | 0 0 4346 0 0 0 | 22 23 0 | 24 0 4347 ------------------------------------- 4348 Proc2 25 26 27 | 0 0 28 | 29 0 4349 30 0 0 | 31 32 33 | 0 34 4350 .ve 4351 4352 This can be represented as a collection of submatrices as 4353 4354 .vb 4355 A B C 4356 D E F 4357 G H I 4358 .ve 4359 4360 Where the submatrices A,B,C are owned by proc0, D,E,F are 4361 owned by proc1, G,H,I are owned by proc2. 4362 4363 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4364 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4365 The 'M','N' parameters are 8,8, and have the same values on all procs. 4366 4367 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4368 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4369 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4370 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4371 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4372 matrix, ans [DF] as another SeqAIJ matrix. 4373 4374 When d_nz, o_nz parameters are specified, d_nz storage elements are 4375 allocated for every row of the local diagonal submatrix, and o_nz 4376 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4377 One way to choose d_nz and o_nz is to use the max nonzerors per local 4378 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4379 In this case, the values of d_nz,o_nz are 4380 .vb 4381 proc0 : dnz = 2, o_nz = 2 4382 proc1 : dnz = 3, o_nz = 2 4383 proc2 : dnz = 1, o_nz = 4 4384 .ve 4385 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4386 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4387 for proc3. i.e we are using 12+15+10=37 storage locations to store 4388 34 values. 4389 4390 When d_nnz, o_nnz parameters are specified, the storage is specified 4391 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4392 In the above case the values for d_nnz,o_nnz are 4393 .vb 4394 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4395 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4396 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4397 .ve 4398 Here the space allocated is sum of all the above values i.e 34, and 4399 hence pre-allocation is perfect. 4400 4401 Level: intermediate 4402 4403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4404 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4405 @*/ 4406 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4407 { 4408 PetscErrorCode ierr; 4409 PetscMPIInt size; 4410 4411 PetscFunctionBegin; 4412 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4413 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4415 if (size > 1) { 4416 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4417 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4418 } else { 4419 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4420 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4421 } 4422 PetscFunctionReturn(0); 4423 } 4424 4425 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4426 { 4427 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4428 PetscBool flg; 4429 PetscErrorCode ierr; 4430 4431 PetscFunctionBegin; 4432 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4433 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4434 if (Ad) *Ad = a->A; 4435 if (Ao) *Ao = a->B; 4436 if (colmap) *colmap = a->garray; 4437 PetscFunctionReturn(0); 4438 } 4439 4440 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4441 { 4442 PetscErrorCode ierr; 4443 PetscInt m,N,i,rstart,nnz,Ii; 4444 PetscInt *indx; 4445 PetscScalar *values; 4446 4447 PetscFunctionBegin; 4448 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4449 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4450 PetscInt *dnz,*onz,sum,bs,cbs; 4451 4452 if (n == PETSC_DECIDE) { 4453 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4454 } 4455 /* Check sum(n) = N */ 4456 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4457 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4458 4459 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4460 rstart -= m; 4461 4462 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4463 for (i=0; i<m; i++) { 4464 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4465 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4466 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4467 } 4468 4469 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4470 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4471 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4472 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4473 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4474 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4475 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4476 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4477 } 4478 4479 /* numeric phase */ 4480 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4481 for (i=0; i<m; i++) { 4482 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4483 Ii = i + rstart; 4484 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4485 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4486 } 4487 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4493 { 4494 PetscErrorCode ierr; 4495 PetscMPIInt rank; 4496 PetscInt m,N,i,rstart,nnz; 4497 size_t len; 4498 const PetscInt *indx; 4499 PetscViewer out; 4500 char *name; 4501 Mat B; 4502 const PetscScalar *values; 4503 4504 PetscFunctionBegin; 4505 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4506 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4507 /* Should this be the type of the diagonal block of A? */ 4508 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4509 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4510 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4511 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4512 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4513 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4514 for (i=0; i<m; i++) { 4515 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4517 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4518 } 4519 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 4522 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4523 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4524 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4525 sprintf(name,"%s.%d",outfile,rank); 4526 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4527 ierr = PetscFree(name);CHKERRQ(ierr); 4528 ierr = MatView(B,out);CHKERRQ(ierr); 4529 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4530 ierr = MatDestroy(&B);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4535 { 4536 PetscErrorCode ierr; 4537 Mat_Merge_SeqsToMPI *merge; 4538 PetscContainer container; 4539 4540 PetscFunctionBegin; 4541 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4542 if (container) { 4543 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4556 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4557 ierr = PetscFree(merge);CHKERRQ(ierr); 4558 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4559 } 4560 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 #include <../src/mat/utils/freespace.h> 4565 #include <petscbt.h> 4566 4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4568 { 4569 PetscErrorCode ierr; 4570 MPI_Comm comm; 4571 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4572 PetscMPIInt size,rank,taga,*len_s; 4573 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4574 PetscInt proc,m; 4575 PetscInt **buf_ri,**buf_rj; 4576 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4577 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4578 MPI_Request *s_waits,*r_waits; 4579 MPI_Status *status; 4580 MatScalar *aa=a->a; 4581 MatScalar **abuf_r,*ba_i; 4582 Mat_Merge_SeqsToMPI *merge; 4583 PetscContainer container; 4584 4585 PetscFunctionBegin; 4586 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4587 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4588 4589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4590 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4591 4592 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4593 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4594 4595 bi = merge->bi; 4596 bj = merge->bj; 4597 buf_ri = merge->buf_ri; 4598 buf_rj = merge->buf_rj; 4599 4600 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4601 owners = merge->rowmap->range; 4602 len_s = merge->len_s; 4603 4604 /* send and recv matrix values */ 4605 /*-----------------------------*/ 4606 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4607 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4608 4609 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4610 for (proc=0,k=0; proc<size; proc++) { 4611 if (!len_s[proc]) continue; 4612 i = owners[proc]; 4613 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4614 k++; 4615 } 4616 4617 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4618 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4619 ierr = PetscFree(status);CHKERRQ(ierr); 4620 4621 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4622 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4623 4624 /* insert mat values of mpimat */ 4625 /*----------------------------*/ 4626 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4627 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4628 4629 for (k=0; k<merge->nrecv; k++) { 4630 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4631 nrows = *(buf_ri_k[k]); 4632 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4633 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4634 } 4635 4636 /* set values of ba */ 4637 m = merge->rowmap->n; 4638 for (i=0; i<m; i++) { 4639 arow = owners[rank] + i; 4640 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4641 bnzi = bi[i+1] - bi[i]; 4642 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4643 4644 /* add local non-zero vals of this proc's seqmat into ba */ 4645 anzi = ai[arow+1] - ai[arow]; 4646 aj = a->j + ai[arow]; 4647 aa = a->a + ai[arow]; 4648 nextaj = 0; 4649 for (j=0; nextaj<anzi; j++) { 4650 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4651 ba_i[j] += aa[nextaj++]; 4652 } 4653 } 4654 4655 /* add received vals into ba */ 4656 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4657 /* i-th row */ 4658 if (i == *nextrow[k]) { 4659 anzi = *(nextai[k]+1) - *nextai[k]; 4660 aj = buf_rj[k] + *(nextai[k]); 4661 aa = abuf_r[k] + *(nextai[k]); 4662 nextaj = 0; 4663 for (j=0; nextaj<anzi; j++) { 4664 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4665 ba_i[j] += aa[nextaj++]; 4666 } 4667 } 4668 nextrow[k]++; nextai[k]++; 4669 } 4670 } 4671 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4672 } 4673 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 4676 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4677 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4678 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4679 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4680 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4681 PetscFunctionReturn(0); 4682 } 4683 4684 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4685 { 4686 PetscErrorCode ierr; 4687 Mat B_mpi; 4688 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4689 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4690 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4691 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4692 PetscInt len,proc,*dnz,*onz,bs,cbs; 4693 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4694 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4695 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4696 MPI_Status *status; 4697 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4698 PetscBT lnkbt; 4699 Mat_Merge_SeqsToMPI *merge; 4700 PetscContainer container; 4701 4702 PetscFunctionBegin; 4703 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4704 4705 /* make sure it is a PETSc comm */ 4706 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4707 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4709 4710 ierr = PetscNew(&merge);CHKERRQ(ierr); 4711 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4712 4713 /* determine row ownership */ 4714 /*---------------------------------------------------------*/ 4715 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4716 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4717 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4718 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4719 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4720 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4721 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4722 4723 m = merge->rowmap->n; 4724 owners = merge->rowmap->range; 4725 4726 /* determine the number of messages to send, their lengths */ 4727 /*---------------------------------------------------------*/ 4728 len_s = merge->len_s; 4729 4730 len = 0; /* length of buf_si[] */ 4731 merge->nsend = 0; 4732 for (proc=0; proc<size; proc++) { 4733 len_si[proc] = 0; 4734 if (proc == rank) { 4735 len_s[proc] = 0; 4736 } else { 4737 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4738 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4739 } 4740 if (len_s[proc]) { 4741 merge->nsend++; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 if (ai[i+1] > ai[i]) nrows++; 4745 } 4746 len_si[proc] = 2*(nrows+1); 4747 len += len_si[proc]; 4748 } 4749 } 4750 4751 /* determine the number and length of messages to receive for ij-structure */ 4752 /*-------------------------------------------------------------------------*/ 4753 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4754 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4755 4756 /* post the Irecv of j-structure */ 4757 /*-------------------------------*/ 4758 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4759 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4760 4761 /* post the Isend of j-structure */ 4762 /*--------------------------------*/ 4763 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4764 4765 for (proc=0, k=0; proc<size; proc++) { 4766 if (!len_s[proc]) continue; 4767 i = owners[proc]; 4768 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4769 k++; 4770 } 4771 4772 /* receives and sends of j-structure are complete */ 4773 /*------------------------------------------------*/ 4774 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4775 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4776 4777 /* send and recv i-structure */ 4778 /*---------------------------*/ 4779 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4780 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4781 4782 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4783 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4784 for (proc=0,k=0; proc<size; proc++) { 4785 if (!len_s[proc]) continue; 4786 /* form outgoing message for i-structure: 4787 buf_si[0]: nrows to be sent 4788 [1:nrows]: row index (global) 4789 [nrows+1:2*nrows+1]: i-structure index 4790 */ 4791 /*-------------------------------------------*/ 4792 nrows = len_si[proc]/2 - 1; 4793 buf_si_i = buf_si + nrows+1; 4794 buf_si[0] = nrows; 4795 buf_si_i[0] = 0; 4796 nrows = 0; 4797 for (i=owners[proc]; i<owners[proc+1]; i++) { 4798 anzi = ai[i+1] - ai[i]; 4799 if (anzi) { 4800 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4801 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4802 nrows++; 4803 } 4804 } 4805 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4806 k++; 4807 buf_si += len_si[proc]; 4808 } 4809 4810 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4811 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4812 4813 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4814 for (i=0; i<merge->nrecv; i++) { 4815 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4816 } 4817 4818 ierr = PetscFree(len_si);CHKERRQ(ierr); 4819 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4820 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4821 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4822 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4823 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4824 ierr = PetscFree(status);CHKERRQ(ierr); 4825 4826 /* compute a local seq matrix in each processor */ 4827 /*----------------------------------------------*/ 4828 /* allocate bi array and free space for accumulating nonzero column info */ 4829 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4830 bi[0] = 0; 4831 4832 /* create and initialize a linked list */ 4833 nlnk = N+1; 4834 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4835 4836 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4837 len = ai[owners[rank+1]] - ai[owners[rank]]; 4838 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4839 4840 current_space = free_space; 4841 4842 /* determine symbolic info for each local row */ 4843 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4844 4845 for (k=0; k<merge->nrecv; k++) { 4846 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4847 nrows = *buf_ri_k[k]; 4848 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4849 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4850 } 4851 4852 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4853 len = 0; 4854 for (i=0; i<m; i++) { 4855 bnzi = 0; 4856 /* add local non-zero cols of this proc's seqmat into lnk */ 4857 arow = owners[rank] + i; 4858 anzi = ai[arow+1] - ai[arow]; 4859 aj = a->j + ai[arow]; 4860 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4861 bnzi += nlnk; 4862 /* add received col data into lnk */ 4863 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4864 if (i == *nextrow[k]) { /* i-th row */ 4865 anzi = *(nextai[k]+1) - *nextai[k]; 4866 aj = buf_rj[k] + *nextai[k]; 4867 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4868 bnzi += nlnk; 4869 nextrow[k]++; nextai[k]++; 4870 } 4871 } 4872 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4873 4874 /* if free space is not available, make more free space */ 4875 if (current_space->local_remaining<bnzi) { 4876 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4877 nspacedouble++; 4878 } 4879 /* copy data into free space, then initialize lnk */ 4880 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4881 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4882 4883 current_space->array += bnzi; 4884 current_space->local_used += bnzi; 4885 current_space->local_remaining -= bnzi; 4886 4887 bi[i+1] = bi[i] + bnzi; 4888 } 4889 4890 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4891 4892 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4893 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4894 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4895 4896 /* create symbolic parallel matrix B_mpi */ 4897 /*---------------------------------------*/ 4898 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4899 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4900 if (n==PETSC_DECIDE) { 4901 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4902 } else { 4903 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4904 } 4905 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4906 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4907 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4908 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4909 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4910 4911 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4912 B_mpi->assembled = PETSC_FALSE; 4913 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4914 merge->bi = bi; 4915 merge->bj = bj; 4916 merge->buf_ri = buf_ri; 4917 merge->buf_rj = buf_rj; 4918 merge->coi = NULL; 4919 merge->coj = NULL; 4920 merge->owners_co = NULL; 4921 4922 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4923 4924 /* attach the supporting struct to B_mpi for reuse */ 4925 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4926 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4927 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4928 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4929 *mpimat = B_mpi; 4930 4931 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 4935 /*@C 4936 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4937 matrices from each processor 4938 4939 Collective 4940 4941 Input Parameters: 4942 + comm - the communicators the parallel matrix will live on 4943 . seqmat - the input sequential matrices 4944 . m - number of local rows (or PETSC_DECIDE) 4945 . n - number of local columns (or PETSC_DECIDE) 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . mpimat - the parallel matrix generated 4950 4951 Level: advanced 4952 4953 Notes: 4954 The dimensions of the sequential matrix in each processor MUST be the same. 4955 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4956 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4957 @*/ 4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4959 { 4960 PetscErrorCode ierr; 4961 PetscMPIInt size; 4962 4963 PetscFunctionBegin; 4964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4965 if (size == 1) { 4966 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 if (scall == MAT_INITIAL_MATRIX) { 4968 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4969 } else { 4970 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4971 } 4972 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4978 } 4979 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4980 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /*@ 4985 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4986 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4987 with MatGetSize() 4988 4989 Not Collective 4990 4991 Input Parameters: 4992 + A - the matrix 4993 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4994 4995 Output Parameter: 4996 . A_loc - the local sequential matrix generated 4997 4998 Level: developer 4999 5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5001 5002 @*/ 5003 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5004 { 5005 PetscErrorCode ierr; 5006 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5007 Mat_SeqAIJ *mat,*a,*b; 5008 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5009 MatScalar *aa,*ba,*cam; 5010 PetscScalar *ca; 5011 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5012 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5013 PetscBool match; 5014 MPI_Comm comm; 5015 PetscMPIInt size; 5016 5017 PetscFunctionBegin; 5018 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5019 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5020 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5022 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5023 5024 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5025 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5026 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5027 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5028 aa = a->a; ba = b->a; 5029 if (scall == MAT_INITIAL_MATRIX) { 5030 if (size == 1) { 5031 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5036 ci[0] = 0; 5037 for (i=0; i<am; i++) { 5038 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5039 } 5040 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5041 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5042 k = 0; 5043 for (i=0; i<am; i++) { 5044 ncols_o = bi[i+1] - bi[i]; 5045 ncols_d = ai[i+1] - ai[i]; 5046 /* off-diagonal portion of A */ 5047 for (jo=0; jo<ncols_o; jo++) { 5048 col = cmap[*bj]; 5049 if (col >= cstart) break; 5050 cj[k] = col; bj++; 5051 ca[k++] = *ba++; 5052 } 5053 /* diagonal portion of A */ 5054 for (j=0; j<ncols_d; j++) { 5055 cj[k] = cstart + *aj++; 5056 ca[k++] = *aa++; 5057 } 5058 /* off-diagonal portion of A */ 5059 for (j=jo; j<ncols_o; j++) { 5060 cj[k] = cmap[*bj++]; 5061 ca[k++] = *ba++; 5062 } 5063 } 5064 /* put together the new matrix */ 5065 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5066 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5067 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5068 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5069 mat->free_a = PETSC_TRUE; 5070 mat->free_ij = PETSC_TRUE; 5071 mat->nonew = 0; 5072 } else if (scall == MAT_REUSE_MATRIX) { 5073 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5074 ci = mat->i; cj = mat->j; cam = mat->a; 5075 for (i=0; i<am; i++) { 5076 /* off-diagonal portion of A */ 5077 ncols_o = bi[i+1] - bi[i]; 5078 for (jo=0; jo<ncols_o; jo++) { 5079 col = cmap[*bj]; 5080 if (col >= cstart) break; 5081 *cam++ = *ba++; bj++; 5082 } 5083 /* diagonal portion of A */ 5084 ncols_d = ai[i+1] - ai[i]; 5085 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5086 /* off-diagonal portion of A */ 5087 for (j=jo; j<ncols_o; j++) { 5088 *cam++ = *ba++; bj++; 5089 } 5090 } 5091 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5092 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 /*@C 5097 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - row, col - index sets of rows and columns to extract (or NULL) 5105 5106 Output Parameter: 5107 . A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5112 5113 @*/ 5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5115 { 5116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5117 PetscErrorCode ierr; 5118 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5119 IS isrowa,iscola; 5120 Mat *aloc; 5121 PetscBool match; 5122 5123 PetscFunctionBegin; 5124 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5125 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5126 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 if (!row) { 5128 start = A->rmap->rstart; end = A->rmap->rend; 5129 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5130 } else { 5131 isrowa = *row; 5132 } 5133 if (!col) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5148 } else { 5149 iscola = *col; 5150 } 5151 if (scall != MAT_INITIAL_MATRIX) { 5152 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5153 aloc[0] = *A_loc; 5154 } 5155 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5156 if (!col) { /* attach global id of condensed columns */ 5157 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5158 } 5159 *A_loc = aloc[0]; 5160 ierr = PetscFree(aloc);CHKERRQ(ierr); 5161 if (!row) { 5162 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5163 } 5164 if (!col) { 5165 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5166 } 5167 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 /*@C 5172 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5173 5174 Collective on Mat 5175 5176 Input Parameters: 5177 + A,B - the matrices in mpiaij format 5178 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5179 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5180 5181 Output Parameter: 5182 + rowb, colb - index sets of rows and columns of B to extract 5183 - B_seq - the sequential matrix generated 5184 5185 Level: developer 5186 5187 @*/ 5188 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5189 { 5190 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5191 PetscErrorCode ierr; 5192 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5193 IS isrowb,iscolb; 5194 Mat *bseq=NULL; 5195 5196 PetscFunctionBegin; 5197 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5198 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5199 } 5200 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5201 5202 if (scall == MAT_INITIAL_MATRIX) { 5203 start = A->cmap->rstart; 5204 cmap = a->garray; 5205 nzA = a->A->cmap->n; 5206 nzB = a->B->cmap->n; 5207 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5208 ncols = 0; 5209 for (i=0; i<nzB; i++) { /* row < local row index */ 5210 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5211 else break; 5212 } 5213 imark = i; 5214 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5215 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5216 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5217 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5218 } else { 5219 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5220 isrowb = *rowb; iscolb = *colb; 5221 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5222 bseq[0] = *B_seq; 5223 } 5224 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5225 *B_seq = bseq[0]; 5226 ierr = PetscFree(bseq);CHKERRQ(ierr); 5227 if (!rowb) { 5228 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5229 } else { 5230 *rowb = isrowb; 5231 } 5232 if (!colb) { 5233 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5234 } else { 5235 *colb = iscolb; 5236 } 5237 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5238 PetscFunctionReturn(0); 5239 } 5240 5241 /* 5242 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5243 of the OFF-DIAGONAL portion of local A 5244 5245 Collective on Mat 5246 5247 Input Parameters: 5248 + A,B - the matrices in mpiaij format 5249 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5250 5251 Output Parameter: 5252 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5253 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5254 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5255 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5256 5257 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5258 for this matrix. This is not desirable.. 5259 5260 Level: developer 5261 5262 */ 5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5264 { 5265 PetscErrorCode ierr; 5266 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5267 Mat_SeqAIJ *b_oth; 5268 VecScatter ctx; 5269 MPI_Comm comm; 5270 const PetscMPIInt *rprocs,*sprocs; 5271 const PetscInt *srow,*rstarts,*sstarts; 5272 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5273 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5274 PetscScalar *b_otha,*bufa,*bufA,*vals; 5275 MPI_Request *rwaits = NULL,*swaits = NULL; 5276 MPI_Status rstatus; 5277 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5278 5279 PetscFunctionBegin; 5280 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5282 5283 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5284 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5285 } 5286 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5287 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5288 5289 if (size == 1) { 5290 startsj_s = NULL; 5291 bufa_ptr = NULL; 5292 *B_oth = NULL; 5293 PetscFunctionReturn(0); 5294 } 5295 5296 ctx = a->Mvctx; 5297 tag = ((PetscObject)ctx)->tag; 5298 5299 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5300 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5301 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5302 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5303 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5304 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5305 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5306 5307 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5308 if (scall == MAT_INITIAL_MATRIX) { 5309 /* i-array */ 5310 /*---------*/ 5311 /* post receives */ 5312 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5313 for (i=0; i<nrecvs; i++) { 5314 rowlen = rvalues + rstarts[i]*rbs; 5315 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5316 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5317 } 5318 5319 /* pack the outgoing message */ 5320 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5321 5322 sstartsj[0] = 0; 5323 rstartsj[0] = 0; 5324 len = 0; /* total length of j or a array to be sent */ 5325 if (nsends) { 5326 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5327 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5328 } 5329 for (i=0; i<nsends; i++) { 5330 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5331 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5332 for (j=0; j<nrows; j++) { 5333 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5334 for (l=0; l<sbs; l++) { 5335 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5336 5337 rowlen[j*sbs+l] = ncols; 5338 5339 len += ncols; 5340 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5341 } 5342 k++; 5343 } 5344 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5345 5346 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5347 } 5348 /* recvs and sends of i-array are completed */ 5349 i = nrecvs; 5350 while (i--) { 5351 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5352 } 5353 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5354 ierr = PetscFree(svalues);CHKERRQ(ierr); 5355 5356 /* allocate buffers for sending j and a arrays */ 5357 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5358 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5359 5360 /* create i-array of B_oth */ 5361 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5362 5363 b_othi[0] = 0; 5364 len = 0; /* total length of j or a array to be received */ 5365 k = 0; 5366 for (i=0; i<nrecvs; i++) { 5367 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5368 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5369 for (j=0; j<nrows; j++) { 5370 b_othi[k+1] = b_othi[k] + rowlen[j]; 5371 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5372 k++; 5373 } 5374 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5375 } 5376 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5377 5378 /* allocate space for j and a arrrays of B_oth */ 5379 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5380 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5381 5382 /* j-array */ 5383 /*---------*/ 5384 /* post receives of j-array */ 5385 for (i=0; i<nrecvs; i++) { 5386 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5387 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5388 } 5389 5390 /* pack the outgoing message j-array */ 5391 if (nsends) k = sstarts[0]; 5392 for (i=0; i<nsends; i++) { 5393 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5394 bufJ = bufj+sstartsj[i]; 5395 for (j=0; j<nrows; j++) { 5396 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5397 for (ll=0; ll<sbs; ll++) { 5398 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5399 for (l=0; l<ncols; l++) { 5400 *bufJ++ = cols[l]; 5401 } 5402 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5403 } 5404 } 5405 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5406 } 5407 5408 /* recvs and sends of j-array are completed */ 5409 i = nrecvs; 5410 while (i--) { 5411 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5412 } 5413 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5414 } else if (scall == MAT_REUSE_MATRIX) { 5415 sstartsj = *startsj_s; 5416 rstartsj = *startsj_r; 5417 bufa = *bufa_ptr; 5418 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5419 b_otha = b_oth->a; 5420 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5421 5422 /* a-array */ 5423 /*---------*/ 5424 /* post receives of a-array */ 5425 for (i=0; i<nrecvs; i++) { 5426 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5427 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5428 } 5429 5430 /* pack the outgoing message a-array */ 5431 if (nsends) k = sstarts[0]; 5432 for (i=0; i<nsends; i++) { 5433 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5434 bufA = bufa+sstartsj[i]; 5435 for (j=0; j<nrows; j++) { 5436 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5437 for (ll=0; ll<sbs; ll++) { 5438 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5439 for (l=0; l<ncols; l++) { 5440 *bufA++ = vals[l]; 5441 } 5442 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5443 } 5444 } 5445 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5446 } 5447 /* recvs and sends of a-array are completed */ 5448 i = nrecvs; 5449 while (i--) { 5450 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5451 } 5452 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5453 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5454 5455 if (scall == MAT_INITIAL_MATRIX) { 5456 /* put together the new matrix */ 5457 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5458 5459 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5460 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5461 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5462 b_oth->free_a = PETSC_TRUE; 5463 b_oth->free_ij = PETSC_TRUE; 5464 b_oth->nonew = 0; 5465 5466 ierr = PetscFree(bufj);CHKERRQ(ierr); 5467 if (!startsj_s || !bufa_ptr) { 5468 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5469 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5470 } else { 5471 *startsj_s = sstartsj; 5472 *startsj_r = rstartsj; 5473 *bufa_ptr = bufa; 5474 } 5475 } 5476 5477 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5478 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5479 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5480 PetscFunctionReturn(0); 5481 } 5482 5483 /*@C 5484 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5485 5486 Not Collective 5487 5488 Input Parameters: 5489 . A - The matrix in mpiaij format 5490 5491 Output Parameter: 5492 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5493 . colmap - A map from global column index to local index into lvec 5494 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5495 5496 Level: developer 5497 5498 @*/ 5499 #if defined(PETSC_USE_CTABLE) 5500 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5501 #else 5502 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5503 #endif 5504 { 5505 Mat_MPIAIJ *a; 5506 5507 PetscFunctionBegin; 5508 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5509 PetscValidPointer(lvec, 2); 5510 PetscValidPointer(colmap, 3); 5511 PetscValidPointer(multScatter, 4); 5512 a = (Mat_MPIAIJ*) A->data; 5513 if (lvec) *lvec = a->lvec; 5514 if (colmap) *colmap = a->colmap; 5515 if (multScatter) *multScatter = a->Mvctx; 5516 PetscFunctionReturn(0); 5517 } 5518 5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5520 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5522 #if defined(PETSC_HAVE_MKL_SPARSE) 5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5524 #endif 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5526 #if defined(PETSC_HAVE_ELEMENTAL) 5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5528 #endif 5529 #if defined(PETSC_HAVE_HYPRE) 5530 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5531 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5532 #endif 5533 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5535 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5536 5537 /* 5538 Computes (B'*A')' since computing B*A directly is untenable 5539 5540 n p p 5541 ( ) ( ) ( ) 5542 m ( A ) * n ( B ) = m ( C ) 5543 ( ) ( ) ( ) 5544 5545 */ 5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5547 { 5548 PetscErrorCode ierr; 5549 Mat At,Bt,Ct; 5550 5551 PetscFunctionBegin; 5552 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5553 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5554 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5555 ierr = MatDestroy(&At);CHKERRQ(ierr); 5556 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5557 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5558 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5559 PetscFunctionReturn(0); 5560 } 5561 5562 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5563 { 5564 PetscErrorCode ierr; 5565 PetscInt m=A->rmap->n,n=B->cmap->n; 5566 Mat Cmat; 5567 5568 PetscFunctionBegin; 5569 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5570 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5571 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5572 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5573 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5574 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5575 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5576 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5577 5578 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5579 5580 *C = Cmat; 5581 PetscFunctionReturn(0); 5582 } 5583 5584 /* ----------------------------------------------------------------*/ 5585 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5586 { 5587 PetscErrorCode ierr; 5588 5589 PetscFunctionBegin; 5590 if (scall == MAT_INITIAL_MATRIX) { 5591 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5592 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5593 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5594 } 5595 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5596 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5597 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /*MC 5602 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5603 5604 Options Database Keys: 5605 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5606 5607 Level: beginner 5608 5609 .seealso: MatCreateAIJ() 5610 M*/ 5611 5612 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5613 { 5614 Mat_MPIAIJ *b; 5615 PetscErrorCode ierr; 5616 PetscMPIInt size; 5617 5618 PetscFunctionBegin; 5619 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5620 5621 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5622 B->data = (void*)b; 5623 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5624 B->assembled = PETSC_FALSE; 5625 B->insertmode = NOT_SET_VALUES; 5626 b->size = size; 5627 5628 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5629 5630 /* build cache for off array entries formed */ 5631 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5632 5633 b->donotstash = PETSC_FALSE; 5634 b->colmap = 0; 5635 b->garray = 0; 5636 b->roworiented = PETSC_TRUE; 5637 5638 /* stuff used for matrix vector multiply */ 5639 b->lvec = NULL; 5640 b->Mvctx = NULL; 5641 5642 /* stuff for MatGetRow() */ 5643 b->rowindices = 0; 5644 b->rowvalues = 0; 5645 b->getrowactive = PETSC_FALSE; 5646 5647 /* flexible pointer used in CUSP/CUSPARSE classes */ 5648 b->spptr = NULL; 5649 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5660 #if defined(PETSC_HAVE_MKL_SPARSE) 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5662 #endif 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5665 #if defined(PETSC_HAVE_ELEMENTAL) 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5667 #endif 5668 #if defined(PETSC_HAVE_HYPRE) 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5670 #endif 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5676 #if defined(PETSC_HAVE_HYPRE) 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5678 #endif 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5680 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5681 PetscFunctionReturn(0); 5682 } 5683 5684 /*@C 5685 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5686 and "off-diagonal" part of the matrix in CSR format. 5687 5688 Collective 5689 5690 Input Parameters: 5691 + comm - MPI communicator 5692 . m - number of local rows (Cannot be PETSC_DECIDE) 5693 . n - This value should be the same as the local size used in creating the 5694 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5695 calculated if N is given) For square matrices n is almost always m. 5696 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5697 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5698 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5699 . j - column indices 5700 . a - matrix values 5701 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5702 . oj - column indices 5703 - oa - matrix values 5704 5705 Output Parameter: 5706 . mat - the matrix 5707 5708 Level: advanced 5709 5710 Notes: 5711 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5712 must free the arrays once the matrix has been destroyed and not before. 5713 5714 The i and j indices are 0 based 5715 5716 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5717 5718 This sets local rows and cannot be used to set off-processor values. 5719 5720 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5721 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5722 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5723 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5724 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5725 communication if it is known that only local entries will be set. 5726 5727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5728 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5729 @*/ 5730 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5731 { 5732 PetscErrorCode ierr; 5733 Mat_MPIAIJ *maij; 5734 5735 PetscFunctionBegin; 5736 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5737 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5738 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5739 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5740 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5741 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5742 maij = (Mat_MPIAIJ*) (*mat)->data; 5743 5744 (*mat)->preallocated = PETSC_TRUE; 5745 5746 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5747 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5748 5749 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5750 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5751 5752 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5753 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5754 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5755 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 5757 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5758 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5759 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5760 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5761 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5762 PetscFunctionReturn(0); 5763 } 5764 5765 /* 5766 Special version for direct calls from Fortran 5767 */ 5768 #include <petsc/private/fortranimpl.h> 5769 5770 /* Change these macros so can be used in void function */ 5771 #undef CHKERRQ 5772 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5773 #undef SETERRQ2 5774 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5775 #undef SETERRQ3 5776 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5777 #undef SETERRQ 5778 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5779 5780 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5781 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5782 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5783 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5784 #else 5785 #endif 5786 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5787 { 5788 Mat mat = *mmat; 5789 PetscInt m = *mm, n = *mn; 5790 InsertMode addv = *maddv; 5791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5792 PetscScalar value; 5793 PetscErrorCode ierr; 5794 5795 MatCheckPreallocated(mat,1); 5796 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5797 5798 #if defined(PETSC_USE_DEBUG) 5799 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5800 #endif 5801 { 5802 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5803 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5804 PetscBool roworiented = aij->roworiented; 5805 5806 /* Some Variables required in the macro */ 5807 Mat A = aij->A; 5808 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5809 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5810 MatScalar *aa = a->a; 5811 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5812 Mat B = aij->B; 5813 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5814 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5815 MatScalar *ba = b->a; 5816 5817 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5818 PetscInt nonew = a->nonew; 5819 MatScalar *ap1,*ap2; 5820 5821 PetscFunctionBegin; 5822 for (i=0; i<m; i++) { 5823 if (im[i] < 0) continue; 5824 #if defined(PETSC_USE_DEBUG) 5825 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5826 #endif 5827 if (im[i] >= rstart && im[i] < rend) { 5828 row = im[i] - rstart; 5829 lastcol1 = -1; 5830 rp1 = aj + ai[row]; 5831 ap1 = aa + ai[row]; 5832 rmax1 = aimax[row]; 5833 nrow1 = ailen[row]; 5834 low1 = 0; 5835 high1 = nrow1; 5836 lastcol2 = -1; 5837 rp2 = bj + bi[row]; 5838 ap2 = ba + bi[row]; 5839 rmax2 = bimax[row]; 5840 nrow2 = bilen[row]; 5841 low2 = 0; 5842 high2 = nrow2; 5843 5844 for (j=0; j<n; j++) { 5845 if (roworiented) value = v[i*n+j]; 5846 else value = v[i+j*m]; 5847 if (in[j] >= cstart && in[j] < cend) { 5848 col = in[j] - cstart; 5849 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5850 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5851 } else if (in[j] < 0) continue; 5852 #if defined(PETSC_USE_DEBUG) 5853 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5854 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5855 #endif 5856 else { 5857 if (mat->was_assembled) { 5858 if (!aij->colmap) { 5859 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5860 } 5861 #if defined(PETSC_USE_CTABLE) 5862 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5863 col--; 5864 #else 5865 col = aij->colmap[in[j]] - 1; 5866 #endif 5867 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5868 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5869 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5870 col = in[j]; 5871 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5872 B = aij->B; 5873 b = (Mat_SeqAIJ*)B->data; 5874 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5875 rp2 = bj + bi[row]; 5876 ap2 = ba + bi[row]; 5877 rmax2 = bimax[row]; 5878 nrow2 = bilen[row]; 5879 low2 = 0; 5880 high2 = nrow2; 5881 bm = aij->B->rmap->n; 5882 ba = b->a; 5883 } 5884 } else col = in[j]; 5885 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5886 } 5887 } 5888 } else if (!aij->donotstash) { 5889 if (roworiented) { 5890 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5891 } else { 5892 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5893 } 5894 } 5895 } 5896 } 5897 PetscFunctionReturnVoid(); 5898 } 5899