1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 218 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) { \ 444 ap1[_i] += value; \ 445 /* Not sure LogFlops will slow dow the code or not */ \ 446 (void)PetscLogFlops(1.0); \ 447 } \ 448 else ap1[_i] = value; \ 449 goto a_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 453 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 454 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 455 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 456 N = nrow1++ - 1; a->nz++; high1++; \ 457 /* shift up all the later entries in this row */ \ 458 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 459 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 460 rp1[_i] = col; \ 461 ap1[_i] = value; \ 462 A->nonzerostate++;\ 463 a_noinsert: ; \ 464 ailen[row] = nrow1; \ 465 } 466 467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 468 { \ 469 if (col <= lastcol2) low2 = 0; \ 470 else high2 = nrow2; \ 471 lastcol2 = col; \ 472 while (high2-low2 > 5) { \ 473 t = (low2+high2)/2; \ 474 if (rp2[t] > col) high2 = t; \ 475 else low2 = t; \ 476 } \ 477 for (_i=low2; _i<high2; _i++) { \ 478 if (rp2[_i] > col) break; \ 479 if (rp2[_i] == col) { \ 480 if (addv == ADD_VALUES) { \ 481 ap2[_i] += value; \ 482 (void)PetscLogFlops(1.0); \ 483 } \ 484 else ap2[_i] = value; \ 485 goto b_noinsert; \ 486 } \ 487 } \ 488 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 489 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 490 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 491 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 492 N = nrow2++ - 1; b->nz++; high2++; \ 493 /* shift up all the later entries in this row */ \ 494 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 495 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 496 rp2[_i] = col; \ 497 ap2[_i] = value; \ 498 B->nonzerostate++; \ 499 b_noinsert: ; \ 500 bilen[row] = nrow2; \ 501 } 502 503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 504 { 505 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 507 PetscErrorCode ierr; 508 PetscInt l,*garray = mat->garray,diag; 509 510 PetscFunctionBegin; 511 /* code only works for square matrices A */ 512 513 /* find size of row to the left of the diagonal part */ 514 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 515 row = row - diag; 516 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 517 if (garray[b->j[b->i[row]+l]] > diag) break; 518 } 519 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 520 521 /* diagonal part */ 522 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 523 524 /* right of diagonal part */ 525 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 PetscScalar value; 533 PetscErrorCode ierr; 534 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 536 PetscBool roworiented = aij->roworiented; 537 538 /* Some Variables required in the macro */ 539 Mat A = aij->A; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 541 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 542 MatScalar *aa = a->a; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 546 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 547 MatScalar *ba = b->a; 548 549 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 550 PetscInt nonew; 551 MatScalar *ap1,*ap2; 552 553 PetscFunctionBegin; 554 for (i=0; i<m; i++) { 555 if (im[i] < 0) continue; 556 #if defined(PETSC_USE_DEBUG) 557 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 558 #endif 559 if (im[i] >= rstart && im[i] < rend) { 560 row = im[i] - rstart; 561 lastcol1 = -1; 562 rp1 = aj + ai[row]; 563 ap1 = aa + ai[row]; 564 rmax1 = aimax[row]; 565 nrow1 = ailen[row]; 566 low1 = 0; 567 high1 = nrow1; 568 lastcol2 = -1; 569 rp2 = bj + bi[row]; 570 ap2 = ba + bi[row]; 571 rmax2 = bimax[row]; 572 nrow2 = bilen[row]; 573 low2 = 0; 574 high2 = nrow2; 575 576 for (j=0; j<n; j++) { 577 if (roworiented) value = v[i*n+j]; 578 else value = v[i+j*m]; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 583 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 584 } else if (in[j] < 0) continue; 585 #if defined(PETSC_USE_DEBUG) 586 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 587 #endif 588 else { 589 if (mat->was_assembled) { 590 if (!aij->colmap) { 591 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 595 col--; 596 #else 597 col = aij->colmap[in[j]] - 1; 598 #endif 599 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 600 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 601 col = in[j]; 602 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 603 B = aij->B; 604 b = (Mat_SeqAIJ*)B->data; 605 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 606 rp2 = bj + bi[row]; 607 ap2 = ba + bi[row]; 608 rmax2 = bimax[row]; 609 nrow2 = bilen[row]; 610 low2 = 0; 611 high2 = nrow2; 612 bm = aij->B->rmap->n; 613 ba = b->a; 614 } else if (col < 0) { 615 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 616 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 617 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 618 } 619 } else col = in[j]; 620 nonew = b->nonew; 621 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 622 } 623 } 624 } else { 625 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 626 if (!aij->donotstash) { 627 mat->assembled = PETSC_FALSE; 628 if (roworiented) { 629 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 630 } else { 631 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 632 } 633 } 634 } 635 } 636 PetscFunctionReturn(0); 637 } 638 639 /* 640 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 641 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 642 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 643 */ 644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 645 { 646 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 647 Mat A = aij->A; /* diagonal part of the matrix */ 648 Mat B = aij->B; /* offdiagonal part of the matrix */ 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 650 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 651 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 652 PetscInt *ailen = a->ilen,*aj = a->j; 653 PetscInt *bilen = b->ilen,*bj = b->j; 654 PetscInt am = aij->A->rmap->n,j; 655 PetscInt diag_so_far = 0,dnz; 656 PetscInt offd_so_far = 0,onz; 657 658 PetscFunctionBegin; 659 /* Iterate over all rows of the matrix */ 660 for (j=0; j<am; j++) { 661 dnz = onz = 0; 662 /* Iterate over all non-zero columns of the current row */ 663 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 664 /* If column is in the diagonal */ 665 if (mat_j[col] >= cstart && mat_j[col] < cend) { 666 aj[diag_so_far++] = mat_j[col] - cstart; 667 dnz++; 668 } else { /* off-diagonal entries */ 669 bj[offd_so_far++] = mat_j[col]; 670 onz++; 671 } 672 } 673 ailen[j] = dnz; 674 bilen[j] = onz; 675 } 676 PetscFunctionReturn(0); 677 } 678 679 /* 680 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 681 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 682 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 683 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 684 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 685 */ 686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 687 { 688 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 689 Mat A = aij->A; /* diagonal part of the matrix */ 690 Mat B = aij->B; /* offdiagonal part of the matrix */ 691 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 693 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 694 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 695 PetscInt *ailen = a->ilen,*aj = a->j; 696 PetscInt *bilen = b->ilen,*bj = b->j; 697 PetscInt am = aij->A->rmap->n,j; 698 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 699 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 700 PetscScalar *aa = a->a,*ba = b->a; 701 702 PetscFunctionBegin; 703 /* Iterate over all rows of the matrix */ 704 for (j=0; j<am; j++) { 705 dnz_row = onz_row = 0; 706 rowstart_offd = full_offd_i[j]; 707 rowstart_diag = full_diag_i[j]; 708 /* Iterate over all non-zero columns of the current row */ 709 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 710 /* If column is in the diagonal */ 711 if (mat_j[col] >= cstart && mat_j[col] < cend) { 712 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 713 aa[rowstart_diag+dnz_row] = mat_a[col]; 714 dnz_row++; 715 } else { /* off-diagonal entries */ 716 bj[rowstart_offd+onz_row] = mat_j[col]; 717 ba[rowstart_offd+onz_row] = mat_a[col]; 718 onz_row++; 719 } 720 } 721 ailen[j] = dnz_row; 722 bilen[j] = onz_row; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 PetscErrorCode ierr; 731 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 733 734 PetscFunctionBegin; 735 for (i=0; i<m; i++) { 736 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 737 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 738 if (idxm[i] >= rstart && idxm[i] < rend) { 739 row = idxm[i] - rstart; 740 for (j=0; j<n; j++) { 741 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 742 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 746 } else { 747 if (!aij->colmap) { 748 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 749 } 750 #if defined(PETSC_USE_CTABLE) 751 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 752 col--; 753 #else 754 col = aij->colmap[idxn[j]] - 1; 755 #endif 756 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 757 else { 758 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 759 } 760 } 761 } 762 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 763 } 764 PetscFunctionReturn(0); 765 } 766 767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 768 769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 772 PetscErrorCode ierr; 773 PetscInt nstash,reallocs; 774 775 PetscFunctionBegin; 776 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 777 778 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 779 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 780 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 785 { 786 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 787 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 788 PetscErrorCode ierr; 789 PetscMPIInt n; 790 PetscInt i,j,rstart,ncols,flg; 791 PetscInt *row,*col; 792 PetscBool other_disassembled; 793 PetscScalar *val; 794 795 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 796 797 PetscFunctionBegin; 798 if (!aij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 801 if (!flg) break; 802 803 for (i=0; i<n; ) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j=i,rstart=row[j]; j<n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j-i; 809 else ncols = n-i; 810 /* Now assemble all these values with a single function call */ 811 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 812 813 i = j; 814 } 815 } 816 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 817 } 818 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 820 821 /* determine if any processor has disassembled, if so we must 822 also disassemble ourselfs, in order that we may reassemble. */ 823 /* 824 if nonzero structure of submatrix B cannot change then we know that 825 no processor disassembled thus we can skip this stuff 826 */ 827 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 828 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 829 if (mat->was_assembled && !other_disassembled) { 830 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 } 833 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 834 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 835 } 836 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 839 840 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 841 842 aij->rowvalues = 0; 843 844 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 845 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 846 847 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 848 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 849 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 850 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 862 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 867 { 868 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 869 PetscObjectState sA, sB; 870 PetscInt *lrows; 871 PetscInt r, len; 872 PetscBool cong, lch, gch; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 /* get locally owned rows */ 877 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 878 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 879 /* fix right hand side if needed */ 880 if (x && b) { 881 const PetscScalar *xx; 882 PetscScalar *bb; 883 884 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 885 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 886 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 887 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 888 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 889 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 890 } 891 892 sA = mat->A->nonzerostate; 893 sB = mat->B->nonzerostate; 894 895 if (diag != 0.0 && cong) { 896 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 897 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 898 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 899 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 900 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 901 PetscInt nnwA, nnwB; 902 PetscBool nnzA, nnzB; 903 904 nnwA = aijA->nonew; 905 nnwB = aijB->nonew; 906 nnzA = aijA->keepnonzeropattern; 907 nnzB = aijB->keepnonzeropattern; 908 if (!nnzA) { 909 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 910 aijA->nonew = 0; 911 } 912 if (!nnzB) { 913 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 914 aijB->nonew = 0; 915 } 916 /* Must zero here before the next loop */ 917 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 918 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 919 for (r = 0; r < len; ++r) { 920 const PetscInt row = lrows[r] + A->rmap->rstart; 921 if (row >= A->cmap->N) continue; 922 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 923 } 924 aijA->nonew = nnwA; 925 aijB->nonew = nnwB; 926 } else { 927 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 928 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 929 } 930 ierr = PetscFree(lrows);CHKERRQ(ierr); 931 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 933 934 /* reduce nonzerostate */ 935 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 936 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 937 if (gch) A->nonzerostate++; 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 942 { 943 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 944 PetscErrorCode ierr; 945 PetscMPIInt n = A->rmap->n; 946 PetscInt i,j,r,m,p = 0,len = 0; 947 PetscInt *lrows,*owners = A->rmap->range; 948 PetscSFNode *rrows; 949 PetscSF sf; 950 const PetscScalar *xx; 951 PetscScalar *bb,*mask; 952 Vec xmask,lmask; 953 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 954 const PetscInt *aj, *ii,*ridx; 955 PetscScalar *aa; 956 957 PetscFunctionBegin; 958 /* Create SF where leaves are input rows and roots are owned rows */ 959 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 960 for (r = 0; r < n; ++r) lrows[r] = -1; 961 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 962 for (r = 0; r < N; ++r) { 963 const PetscInt idx = rows[r]; 964 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 965 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 966 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 967 } 968 rrows[r].rank = p; 969 rrows[r].index = rows[r] - owners[p]; 970 } 971 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 972 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 973 /* Collect flags for rows to be zeroed */ 974 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 975 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 976 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 977 /* Compress and put in row numbers */ 978 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 979 /* zero diagonal part of matrix */ 980 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 981 /* handle off diagonal part of matrix */ 982 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 983 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 984 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 985 for (i=0; i<len; i++) bb[lrows[i]] = 1; 986 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 987 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 990 if (x && b) { /* this code is buggy when the row and column layout don't match */ 991 PetscBool cong; 992 993 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 994 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 995 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 998 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 999 } 1000 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1001 /* remove zeroed rows of off diagonal matrix */ 1002 ii = aij->i; 1003 for (i=0; i<len; i++) { 1004 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1005 } 1006 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1007 if (aij->compressedrow.use) { 1008 m = aij->compressedrow.nrows; 1009 ii = aij->compressedrow.i; 1010 ridx = aij->compressedrow.rindex; 1011 for (i=0; i<m; i++) { 1012 n = ii[i+1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij->a + ii[i]; 1015 1016 for (j=0; j<n; j++) { 1017 if (PetscAbsScalar(mask[*aj])) { 1018 if (b) bb[*ridx] -= *aa*xx[*aj]; 1019 *aa = 0.0; 1020 } 1021 aa++; 1022 aj++; 1023 } 1024 ridx++; 1025 } 1026 } else { /* do not use compressed row format */ 1027 m = l->B->rmap->n; 1028 for (i=0; i<m; i++) { 1029 n = ii[i+1] - ii[i]; 1030 aj = aij->j + ii[i]; 1031 aa = aij->a + ii[i]; 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[i] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 } 1041 } 1042 if (x && b) { 1043 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1044 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1045 } 1046 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1047 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1048 ierr = PetscFree(lrows);CHKERRQ(ierr); 1049 1050 /* only change matrix nonzero state if pattern was allowed to be changed */ 1051 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1052 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1053 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt nt; 1063 VecScatter Mvctx = a->Mvctx; 1064 1065 PetscFunctionBegin; 1066 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1067 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1068 1069 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 PetscErrorCode ierr; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 VecScatter Mvctx = a->Mvctx; 1091 1092 PetscFunctionBegin; 1093 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1094 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1097 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 /* do nondiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1109 /* do local part */ 1110 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1111 /* add partial results together */ 1112 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1118 { 1119 MPI_Comm comm; 1120 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1121 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1122 IS Me,Notme; 1123 PetscErrorCode ierr; 1124 PetscInt M,N,first,last,*notme,i; 1125 PetscBool lf; 1126 PetscMPIInt size; 1127 1128 PetscFunctionBegin; 1129 /* Easy test: symmetric diagonal block */ 1130 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1131 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1132 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1133 if (!*f) PetscFunctionReturn(0); 1134 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1135 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1136 if (size == 1) PetscFunctionReturn(0); 1137 1138 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1139 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1140 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1142 for (i=0; i<first; i++) notme[i] = i; 1143 for (i=last; i<M; i++) notme[i-last+first] = i; 1144 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1145 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1146 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1147 Aoff = Aoffs[0]; 1148 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1149 Boff = Boffs[0]; 1150 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1151 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1152 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1153 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1154 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1155 ierr = PetscFree(notme);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1160 { 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1169 { 1170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 /* do nondiagonal part */ 1175 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1176 /* do local part */ 1177 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1178 /* add partial results together */ 1179 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 /* 1185 This only works correctly for square matrices where the subblock A->A is the 1186 diagonal block 1187 */ 1188 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1189 { 1190 PetscErrorCode ierr; 1191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1192 1193 PetscFunctionBegin; 1194 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1195 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1196 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1207 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1212 { 1213 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 #if defined(PETSC_USE_LOG) 1218 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1219 #endif 1220 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1221 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1222 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1223 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1224 #if defined(PETSC_USE_CTABLE) 1225 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1226 #else 1227 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1230 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1231 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1232 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1233 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1234 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1235 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1236 1237 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1246 #if defined(PETSC_HAVE_ELEMENTAL) 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1248 #endif 1249 #if defined(PETSC_HAVE_HYPRE) 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1252 #endif 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1262 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1263 PetscErrorCode ierr; 1264 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1265 int fd; 1266 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1267 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1268 PetscScalar *column_values; 1269 PetscInt message_count,flowcontrolcount; 1270 FILE *file; 1271 1272 PetscFunctionBegin; 1273 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1275 nz = A->nz + B->nz; 1276 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1277 if (!rank) { 1278 header[0] = MAT_FILE_CLASSID; 1279 header[1] = mat->rmap->N; 1280 header[2] = mat->cmap->N; 1281 1282 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 /* get largest number of rows any processor has */ 1285 rlen = mat->rmap->n; 1286 range = mat->rmap->range; 1287 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1288 } else { 1289 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1290 rlen = mat->rmap->n; 1291 } 1292 1293 /* load up the local row counts */ 1294 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1295 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1296 1297 /* store the row lengths to the file */ 1298 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1299 if (!rank) { 1300 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1301 for (i=1; i<size; i++) { 1302 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1303 rlen = range[i+1] - range[i]; 1304 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1306 } 1307 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1308 } else { 1309 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1310 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1311 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1312 } 1313 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1314 1315 /* load up the local column indices */ 1316 nzmax = nz; /* th processor needs space a largest processor needs */ 1317 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1319 cnt = 0; 1320 for (i=0; i<mat->rmap->n; i++) { 1321 for (j=B->i[i]; j<B->i[i+1]; j++) { 1322 if ((col = garray[B->j[j]]) > cstart) break; 1323 column_indices[cnt++] = col; 1324 } 1325 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1326 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1327 } 1328 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1329 1330 /* store the column indices to the file */ 1331 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1332 if (!rank) { 1333 MPI_Status status; 1334 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1337 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1338 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1339 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1345 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1346 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1348 } 1349 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1350 1351 /* load up the local column values */ 1352 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if (garray[B->j[j]] > cstart) break; 1357 column_values[cnt++] = B->a[j]; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1360 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1361 } 1362 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1363 1364 /* store the column values to the file */ 1365 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1366 if (!rank) { 1367 MPI_Status status; 1368 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1369 for (i=1; i<size; i++) { 1370 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1371 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1372 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1373 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1374 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1375 } 1376 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1377 } else { 1378 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1379 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1380 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1381 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(column_values);CHKERRQ(ierr); 1384 1385 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1386 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #include <petscdraw.h> 1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1392 { 1393 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1394 PetscErrorCode ierr; 1395 PetscMPIInt rank = aij->rank,size = aij->size; 1396 PetscBool isdraw,iascii,isbinary; 1397 PetscViewer sviewer; 1398 PetscViewerFormat format; 1399 1400 PetscFunctionBegin; 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1402 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1404 if (iascii) { 1405 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1406 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1407 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1408 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1409 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 for (i=0; i<(PetscInt)size; i++) { 1411 nmax = PetscMax(nmax,nz[i]); 1412 nmin = PetscMin(nmin,nz[i]); 1413 navg += nz[i]; 1414 } 1415 ierr = PetscFree(nz);CHKERRQ(ierr); 1416 navg = navg/size; 1417 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1421 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1422 MatInfo info; 1423 PetscBool inodes; 1424 1425 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1426 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1427 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1429 if (!inodes) { 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1431 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1432 } else { 1433 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1434 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1435 } 1436 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1437 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1438 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1443 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1446 PetscInt inodecount,inodelimit,*inodes; 1447 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1448 if (inodes) { 1449 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1450 } else { 1451 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1455 PetscFunctionReturn(0); 1456 } 1457 } else if (isbinary) { 1458 if (size == 1) { 1459 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1460 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } else if (iascii && size == 1) { 1466 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1467 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } else if (isdraw) { 1470 PetscDraw draw; 1471 PetscBool isnull; 1472 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1473 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1474 if (isnull) PetscFunctionReturn(0); 1475 } 1476 1477 { /* assemble the entire matrix onto first processor */ 1478 Mat A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1484 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1485 /* The commented code uses MatCreateSubMatrices instead */ 1486 /* 1487 Mat *AA, A = NULL, Av; 1488 IS isrow,iscol; 1489 1490 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1491 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1492 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1493 if (!rank) { 1494 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1495 A = AA[0]; 1496 Av = AA[0]; 1497 } 1498 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1499 */ 1500 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1501 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1502 /* 1503 Everyone has to call to draw the matrix since the graphics waits are 1504 synchronized across all processors that share the PetscDraw object 1505 */ 1506 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1507 if (!rank) { 1508 if (((PetscObject)mat)->name) { 1509 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1510 } 1511 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1512 } 1513 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1514 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1515 ierr = MatDestroy(&A);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1521 { 1522 PetscErrorCode ierr; 1523 PetscBool iascii,isdraw,issocket,isbinary; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1527 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1528 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1530 if (iascii || isdraw || isbinary || issocket) { 1531 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1537 { 1538 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1539 PetscErrorCode ierr; 1540 Vec bb1 = 0; 1541 PetscBool hasop; 1542 1543 PetscFunctionBegin; 1544 if (flag == SOR_APPLY_UPPER) { 1545 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1550 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1551 } 1552 1553 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1554 if (flag & SOR_ZERO_INITIAL_GUESS) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 its--; 1557 } 1558 1559 while (its--) { 1560 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 1563 /* update rhs: bb1 = bb - B*x */ 1564 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1565 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1566 1567 /* local sweep */ 1568 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1569 } 1570 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1571 if (flag & SOR_ZERO_INITIAL_GUESS) { 1572 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1573 its--; 1574 } 1575 while (its--) { 1576 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 1579 /* update rhs: bb1 = bb - B*x */ 1580 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1581 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1582 1583 /* local sweep */ 1584 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1585 } 1586 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1587 if (flag & SOR_ZERO_INITIAL_GUESS) { 1588 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1589 its--; 1590 } 1591 while (its--) { 1592 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 1595 /* update rhs: bb1 = bb - B*x */ 1596 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1597 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1598 1599 /* local sweep */ 1600 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1601 } 1602 } else if (flag & SOR_EISENSTAT) { 1603 Vec xx1; 1604 1605 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1607 1608 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 if (!mat->diag) { 1611 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1612 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1613 } 1614 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1615 if (hasop) { 1616 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1617 } else { 1618 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1619 } 1620 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1621 1622 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1626 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1627 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1628 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1629 1630 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1631 1632 matin->factorerrortype = mat->A->factorerrortype; 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1637 { 1638 Mat aA,aB,Aperm; 1639 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1640 PetscScalar *aa,*ba; 1641 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1642 PetscSF rowsf,sf; 1643 IS parcolp = NULL; 1644 PetscBool done; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1649 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1650 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1651 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1652 1653 /* Invert row permutation to find out where my rows should go */ 1654 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1655 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1656 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1658 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1660 1661 /* Invert column permutation to find out where my columns should go */ 1662 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1663 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1664 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1665 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1666 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1667 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1668 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1669 1670 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1671 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1672 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1673 1674 /* Find out where my gcols should go */ 1675 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1676 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1678 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1679 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1680 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1681 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1682 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1683 1684 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1685 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1686 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1687 for (i=0; i<m; i++) { 1688 PetscInt row = rdest[i],rowner; 1689 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1690 for (j=ai[i]; j<ai[i+1]; j++) { 1691 PetscInt cowner,col = cdest[aj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 for (j=bi[i]; j<bi[i+1]; j++) { 1697 PetscInt cowner,col = gcdest[bj[j]]; 1698 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1699 if (rowner == cowner) dnnz[i]++; 1700 else onnz[i]++; 1701 } 1702 } 1703 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1704 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1705 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1706 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1707 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1708 1709 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1710 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1711 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1712 for (i=0; i<m; i++) { 1713 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1714 PetscInt j0,rowlen; 1715 rowlen = ai[i+1] - ai[i]; 1716 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1717 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1718 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1719 } 1720 rowlen = bi[i+1] - bi[i]; 1721 for (j0=j=0; j<rowlen; j0=j) { 1722 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1723 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1724 } 1725 } 1726 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1729 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1730 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1731 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1732 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1733 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1734 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1735 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1736 *B = Aperm; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1741 { 1742 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1747 if (ghosts) *ghosts = aij->garray; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 Mat A = mat->A,B = mat->B; 1755 PetscErrorCode ierr; 1756 PetscReal isend[5],irecv[5]; 1757 1758 PetscFunctionBegin; 1759 info->block_size = 1.0; 1760 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1761 1762 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1763 isend[3] = info->memory; isend[4] = info->mallocs; 1764 1765 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1766 1767 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1768 isend[3] += info->memory; isend[4] += info->mallocs; 1769 if (flag == MAT_LOCAL) { 1770 info->nz_used = isend[0]; 1771 info->nz_allocated = isend[1]; 1772 info->nz_unneeded = isend[2]; 1773 info->memory = isend[3]; 1774 info->mallocs = isend[4]; 1775 } else if (flag == MAT_GLOBAL_MAX) { 1776 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1777 1778 info->nz_used = irecv[0]; 1779 info->nz_allocated = irecv[1]; 1780 info->nz_unneeded = irecv[2]; 1781 info->memory = irecv[3]; 1782 info->mallocs = irecv[4]; 1783 } else if (flag == MAT_GLOBAL_SUM) { 1784 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1785 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } 1792 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1793 info->fill_ratio_needed = 0; 1794 info->factor_mallocs = 0; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1799 { 1800 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 switch (op) { 1805 case MAT_NEW_NONZERO_LOCATIONS: 1806 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1807 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1808 case MAT_KEEP_NONZERO_PATTERN: 1809 case MAT_NEW_NONZERO_LOCATION_ERR: 1810 case MAT_USE_INODES: 1811 case MAT_IGNORE_ZERO_ENTRIES: 1812 MatCheckPreallocated(A,1); 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_ROW_ORIENTED: 1817 MatCheckPreallocated(A,1); 1818 a->roworiented = flg; 1819 1820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1821 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_NEW_DIAGONALS: 1824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1825 break; 1826 case MAT_IGNORE_OFF_PROC_ENTRIES: 1827 a->donotstash = flg; 1828 break; 1829 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1830 case MAT_SPD: 1831 case MAT_SYMMETRIC: 1832 case MAT_STRUCTURALLY_SYMMETRIC: 1833 case MAT_HERMITIAN: 1834 case MAT_SYMMETRY_ETERNAL: 1835 break; 1836 case MAT_SUBMAT_SINGLEIS: 1837 A->submat_singleis = flg; 1838 break; 1839 case MAT_STRUCTURE_ONLY: 1840 /* The option is handled directly by MatSetOption() */ 1841 break; 1842 default: 1843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1849 { 1850 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1851 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1852 PetscErrorCode ierr; 1853 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1854 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1855 PetscInt *cmap,*idx_p; 1856 1857 PetscFunctionBegin; 1858 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1859 mat->getrowactive = PETSC_TRUE; 1860 1861 if (!mat->rowvalues && (idx || v)) { 1862 /* 1863 allocate enough space to hold information from the longest row. 1864 */ 1865 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1866 PetscInt max = 1,tmp; 1867 for (i=0; i<matin->rmap->n; i++) { 1868 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1869 if (max < tmp) max = tmp; 1870 } 1871 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1872 } 1873 1874 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1875 lrow = row - rstart; 1876 1877 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1878 if (!v) {pvA = 0; pvB = 0;} 1879 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1880 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 nztot = nzA + nzB; 1883 1884 cmap = mat->garray; 1885 if (v || idx) { 1886 if (nztot) { 1887 /* Sort by increasing column numbers, assuming A and B already sorted */ 1888 PetscInt imark = -1; 1889 if (v) { 1890 *v = v_p = mat->rowvalues; 1891 for (i=0; i<nzB; i++) { 1892 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1893 else break; 1894 } 1895 imark = i; 1896 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1897 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1898 } 1899 if (idx) { 1900 *idx = idx_p = mat->rowindices; 1901 if (imark > -1) { 1902 for (i=0; i<imark; i++) { 1903 idx_p[i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1908 else break; 1909 } 1910 imark = i; 1911 } 1912 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1913 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1914 } 1915 } else { 1916 if (idx) *idx = 0; 1917 if (v) *v = 0; 1918 } 1919 } 1920 *nz = nztot; 1921 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1922 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1927 { 1928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1929 1930 PetscFunctionBegin; 1931 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1932 aij->getrowactive = PETSC_FALSE; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1940 PetscErrorCode ierr; 1941 PetscInt i,j,cstart = mat->cmap->rstart; 1942 PetscReal sum = 0.0; 1943 MatScalar *v; 1944 1945 PetscFunctionBegin; 1946 if (aij->size == 1) { 1947 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1948 } else { 1949 if (type == NORM_FROBENIUS) { 1950 v = amat->a; 1951 for (i=0; i<amat->nz; i++) { 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 } 1954 v = bmat->a; 1955 for (i=0; i<bmat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1959 *norm = PetscSqrtReal(*norm); 1960 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1966 *norm = 0.0; 1967 v = amat->a; jj = amat->j; 1968 for (j=0; j<amat->nz; j++) { 1969 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1970 } 1971 v = bmat->a; jj = bmat->j; 1972 for (j=0; j<bmat->nz; j++) { 1973 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1974 } 1975 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1976 for (j=0; j<mat->cmap->N; j++) { 1977 if (tmp2[j] > *norm) *norm = tmp2[j]; 1978 } 1979 ierr = PetscFree(tmp);CHKERRQ(ierr); 1980 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1981 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1997 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1998 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2004 { 2005 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2006 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2007 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2008 PetscErrorCode ierr; 2009 Mat B,A_diag,*B_diag; 2010 MatScalar *array; 2011 2012 PetscFunctionBegin; 2013 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2014 ai = Aloc->i; aj = Aloc->j; 2015 bi = Bloc->i; bj = Bloc->j; 2016 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2017 PetscInt *d_nnz,*g_nnz,*o_nnz; 2018 PetscSFNode *oloc; 2019 PETSC_UNUSED PetscSF sf; 2020 2021 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2022 /* compute d_nnz for preallocation */ 2023 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2024 for (i=0; i<ai[ma]; i++) { 2025 d_nnz[aj[i]]++; 2026 } 2027 /* compute local off-diagonal contributions */ 2028 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2029 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2030 /* map those to global */ 2031 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2032 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2033 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2034 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2035 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2036 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2037 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2038 2039 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2040 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2041 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2042 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2043 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2044 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2045 } else { 2046 B = *matout; 2047 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2048 } 2049 2050 b = (Mat_MPIAIJ*)B->data; 2051 A_diag = a->A; 2052 B_diag = &b->A; 2053 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2054 A_diag_ncol = A_diag->cmap->N; 2055 B_diag_ilen = sub_B_diag->ilen; 2056 B_diag_i = sub_B_diag->i; 2057 2058 /* Set ilen for diagonal of B */ 2059 for (i=0; i<A_diag_ncol; i++) { 2060 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2061 } 2062 2063 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2064 very quickly (=without using MatSetValues), because all writes are local. */ 2065 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2066 2067 /* copy over the B part */ 2068 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2069 array = Bloc->a; 2070 row = A->rmap->rstart; 2071 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2072 cols_tmp = cols; 2073 for (i=0; i<mb; i++) { 2074 ncol = bi[i+1]-bi[i]; 2075 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; 2077 array += ncol; cols_tmp += ncol; 2078 } 2079 ierr = PetscFree(cols);CHKERRQ(ierr); 2080 2081 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2084 *matout = B; 2085 } else { 2086 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2092 { 2093 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2094 Mat a = aij->A,b = aij->B; 2095 PetscErrorCode ierr; 2096 PetscInt s1,s2,s3; 2097 2098 PetscFunctionBegin; 2099 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2100 if (rr) { 2101 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2102 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2103 /* Overlap communication with computation. */ 2104 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 } 2106 if (ll) { 2107 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2108 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2109 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2110 } 2111 /* scale the diagonal block */ 2112 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2113 2114 if (rr) { 2115 /* Do a scatter end and then right scale the off-diagonal block */ 2116 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2123 { 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2133 { 2134 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2135 Mat a,b,c,d; 2136 PetscBool flg; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 a = matA->A; b = matA->B; 2141 c = matB->A; d = matB->B; 2142 2143 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2146 } 2147 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2152 { 2153 PetscErrorCode ierr; 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2155 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2156 2157 PetscFunctionBegin; 2158 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2159 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2160 /* because of the column compression in the off-processor part of the matrix a->B, 2161 the number of columns in a->B and b->B may be different, hence we cannot call 2162 the MatCopy() directly on the two parts. If need be, we can provide a more 2163 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2164 then copying the submatrices */ 2165 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2166 } else { 2167 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2168 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2169 } 2170 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2175 { 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 /* 2184 Computes the number of nonzeros per row needed for preallocation when X and Y 2185 have different nonzero structure. 2186 */ 2187 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2188 { 2189 PetscInt i,j,k,nzx,nzy; 2190 2191 PetscFunctionBegin; 2192 /* Set the number of nonzeros in the new matrix */ 2193 for (i=0; i<m; i++) { 2194 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2195 nzx = xi[i+1] - xi[i]; 2196 nzy = yi[i+1] - yi[i]; 2197 nnz[i] = 0; 2198 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2199 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2200 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2201 nnz[i]++; 2202 } 2203 for (; k<nzy; k++) nnz[i]++; 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2210 { 2211 PetscErrorCode ierr; 2212 PetscInt m = Y->rmap->N; 2213 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2214 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2215 2216 PetscFunctionBegin; 2217 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2222 { 2223 PetscErrorCode ierr; 2224 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2225 PetscBLASInt bnz,one=1; 2226 Mat_SeqAIJ *x,*y; 2227 2228 PetscFunctionBegin; 2229 if (str == SAME_NONZERO_PATTERN) { 2230 PetscScalar alpha = a; 2231 x = (Mat_SeqAIJ*)xx->A->data; 2232 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2233 y = (Mat_SeqAIJ*)yy->A->data; 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 x = (Mat_SeqAIJ*)xx->B->data; 2236 y = (Mat_SeqAIJ*)yy->B->data; 2237 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2238 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2239 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2240 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2241 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2266 { 2267 #if defined(PETSC_USE_COMPLEX) 2268 PetscErrorCode ierr; 2269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2270 2271 PetscFunctionBegin; 2272 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2273 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2274 #else 2275 PetscFunctionBegin; 2276 #endif 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2298 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 PetscInt i,*idxb = 0; 2307 PetscScalar *va,*vb; 2308 Vec vtmp; 2309 2310 PetscFunctionBegin; 2311 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2312 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2313 if (idx) { 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2316 } 2317 } 2318 2319 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2320 if (idx) { 2321 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2322 } 2323 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2324 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2325 2326 for (i=0; i<A->rmap->n; i++) { 2327 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2328 va[i] = vb[i]; 2329 if (idx) idx[i] = a->garray[idxb[i]]; 2330 } 2331 } 2332 2333 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2334 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2335 ierr = PetscFree(idxb);CHKERRQ(ierr); 2336 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->cmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++) { 2365 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2379 { 2380 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2381 PetscInt n = A->rmap->n; 2382 PetscInt cstart = A->cmap->rstart; 2383 PetscInt *cmap = mat->garray; 2384 PetscInt *diagIdx, *offdiagIdx; 2385 Vec diagV, offdiagV; 2386 PetscScalar *a, *diagA, *offdiagA; 2387 PetscInt r; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2394 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2395 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2397 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2398 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2399 for (r = 0; r < n; ++r) { 2400 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 idx[r] = cstart + diagIdx[r]; 2403 } else { 2404 a[r] = offdiagA[r]; 2405 idx[r] = cmap[offdiagIdx[r]]; 2406 } 2407 } 2408 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2409 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2411 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2412 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2413 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2418 { 2419 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2420 PetscInt n = A->rmap->n; 2421 PetscInt cstart = A->cmap->rstart; 2422 PetscInt *cmap = mat->garray; 2423 PetscInt *diagIdx, *offdiagIdx; 2424 Vec diagV, offdiagV; 2425 PetscScalar *a, *diagA, *offdiagA; 2426 PetscInt r; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2433 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2434 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2436 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2437 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2438 for (r = 0; r < n; ++r) { 2439 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2440 a[r] = diagA[r]; 2441 idx[r] = cstart + diagIdx[r]; 2442 } else { 2443 a[r] = offdiagA[r]; 2444 idx[r] = cmap[offdiagIdx[r]]; 2445 } 2446 } 2447 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2450 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2451 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2452 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2457 { 2458 PetscErrorCode ierr; 2459 Mat *dummy; 2460 2461 PetscFunctionBegin; 2462 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2463 *newmat = *dummy; 2464 ierr = PetscFree(dummy);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2475 A->factorerrortype = a->A->factorerrortype; 2476 PetscFunctionReturn(0); 2477 } 2478 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2486 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2487 if (x->assembled) { 2488 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2489 } else { 2490 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2491 } 2492 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2498 { 2499 PetscFunctionBegin; 2500 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2501 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2502 PetscFunctionReturn(0); 2503 } 2504 2505 /*@ 2506 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2507 2508 Collective on Mat 2509 2510 Input Parameters: 2511 + A - the matrix 2512 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2513 2514 Level: advanced 2515 2516 @*/ 2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2527 { 2528 PetscErrorCode ierr; 2529 PetscBool sc = PETSC_FALSE,flg; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2533 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2534 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2535 if (flg) { 2536 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2537 } 2538 ierr = PetscOptionsTail();CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2546 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2547 2548 PetscFunctionBegin; 2549 if (!Y->preallocated) { 2550 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2551 } else if (!aij->nz) { 2552 PetscInt nonew = aij->nonew; 2553 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2554 aij->nonew = nonew; 2555 } 2556 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2561 { 2562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2563 PetscErrorCode ierr; 2564 2565 PetscFunctionBegin; 2566 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2567 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2568 if (d) { 2569 PetscInt rstart; 2570 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2571 *d += rstart; 2572 2573 } 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 /* -------------------------------------------------------------------*/ 2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2589 MatGetRow_MPIAIJ, 2590 MatRestoreRow_MPIAIJ, 2591 MatMult_MPIAIJ, 2592 /* 4*/ MatMultAdd_MPIAIJ, 2593 MatMultTranspose_MPIAIJ, 2594 MatMultTransposeAdd_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 /*10*/ 0, 2599 0, 2600 0, 2601 MatSOR_MPIAIJ, 2602 MatTranspose_MPIAIJ, 2603 /*15*/ MatGetInfo_MPIAIJ, 2604 MatEqual_MPIAIJ, 2605 MatGetDiagonal_MPIAIJ, 2606 MatDiagonalScale_MPIAIJ, 2607 MatNorm_MPIAIJ, 2608 /*20*/ MatAssemblyBegin_MPIAIJ, 2609 MatAssemblyEnd_MPIAIJ, 2610 MatSetOption_MPIAIJ, 2611 MatZeroEntries_MPIAIJ, 2612 /*24*/ MatZeroRows_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*29*/ MatSetUp_MPIAIJ, 2618 0, 2619 0, 2620 MatGetDiagonalBlock_MPIAIJ, 2621 0, 2622 /*34*/ MatDuplicate_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*39*/ MatAXPY_MPIAIJ, 2628 MatCreateSubMatrices_MPIAIJ, 2629 MatIncreaseOverlap_MPIAIJ, 2630 MatGetValues_MPIAIJ, 2631 MatCopy_MPIAIJ, 2632 /*44*/ MatGetRowMax_MPIAIJ, 2633 MatScale_MPIAIJ, 2634 MatShift_MPIAIJ, 2635 MatDiagonalSet_MPIAIJ, 2636 MatZeroRowsColumns_MPIAIJ, 2637 /*49*/ MatSetRandom_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*54*/ MatFDColoringCreate_MPIXAIJ, 2643 0, 2644 MatSetUnfactored_MPIAIJ, 2645 MatPermute_MPIAIJ, 2646 0, 2647 /*59*/ MatCreateSubMatrix_MPIAIJ, 2648 MatDestroy_MPIAIJ, 2649 MatView_MPIAIJ, 2650 0, 2651 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2652 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2653 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2658 MatGetRowMinAbs_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*75*/ MatFDColoringApply_AIJ, 2664 MatSetFromOptions_MPIAIJ, 2665 0, 2666 0, 2667 MatFindZeroDiagonals_MPIAIJ, 2668 /*80*/ 0, 2669 0, 2670 0, 2671 /*83*/ MatLoad_MPIAIJ, 2672 MatIsSymmetric_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2678 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2679 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2680 MatPtAP_MPIAIJ_MPIAIJ, 2681 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2682 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*99*/ 0, 2688 0, 2689 0, 2690 MatConjugate_MPIAIJ, 2691 0, 2692 /*104*/MatSetValuesRow_MPIAIJ, 2693 MatRealPart_MPIAIJ, 2694 MatImaginaryPart_MPIAIJ, 2695 0, 2696 0, 2697 /*109*/0, 2698 0, 2699 MatGetRowMin_MPIAIJ, 2700 0, 2701 MatMissingDiagonal_MPIAIJ, 2702 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2703 0, 2704 MatGetGhosts_MPIAIJ, 2705 0, 2706 0, 2707 /*119*/0, 2708 0, 2709 0, 2710 0, 2711 MatGetMultiProcBlock_MPIAIJ, 2712 /*124*/MatFindNonzeroRows_MPIAIJ, 2713 MatGetColumnNorms_MPIAIJ, 2714 MatInvertBlockDiagonal_MPIAIJ, 2715 MatInvertVariableBlockDiagonal_MPIAIJ, 2716 MatCreateSubMatricesMPI_MPIAIJ, 2717 /*129*/0, 2718 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2719 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2720 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 /*134*/0, 2723 0, 2724 MatRARt_MPIAIJ_MPIAIJ, 2725 0, 2726 0, 2727 /*139*/MatSetBlockSizes_MPIAIJ, 2728 0, 2729 0, 2730 MatFDColoringSetUp_MPIXAIJ, 2731 MatFindOffBlockDiagonalEntries_MPIAIJ, 2732 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2733 }; 2734 2735 /* ----------------------------------------------------------------------------------------*/ 2736 2737 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2738 { 2739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2744 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2749 { 2750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2755 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 PetscMPIInt size; 2764 2765 PetscFunctionBegin; 2766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2768 b = (Mat_MPIAIJ*)B->data; 2769 2770 #if defined(PETSC_USE_CTABLE) 2771 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2772 #else 2773 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2774 #endif 2775 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2776 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2777 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2778 2779 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2780 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2781 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2782 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2783 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2784 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2785 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2786 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2787 2788 if (!B->preallocated) { 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2791 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2792 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2794 } 2795 2796 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2797 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2798 B->preallocated = PETSC_TRUE; 2799 B->was_assembled = PETSC_FALSE; 2800 B->assembled = PETSC_FALSE; 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2805 { 2806 Mat_MPIAIJ *b; 2807 PetscErrorCode ierr; 2808 2809 PetscFunctionBegin; 2810 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2811 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2812 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2813 b = (Mat_MPIAIJ*)B->data; 2814 2815 #if defined(PETSC_USE_CTABLE) 2816 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2817 #else 2818 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2819 #endif 2820 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2821 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2822 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2823 2824 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2825 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2826 B->preallocated = PETSC_TRUE; 2827 B->was_assembled = PETSC_FALSE; 2828 B->assembled = PETSC_FALSE; 2829 PetscFunctionReturn(0); 2830 } 2831 2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2833 { 2834 Mat mat; 2835 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 *newmat = 0; 2840 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2841 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2842 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2843 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2844 a = (Mat_MPIAIJ*)mat->data; 2845 2846 mat->factortype = matin->factortype; 2847 mat->assembled = PETSC_TRUE; 2848 mat->insertmode = NOT_SET_VALUES; 2849 mat->preallocated = PETSC_TRUE; 2850 2851 a->size = oldmat->size; 2852 a->rank = oldmat->rank; 2853 a->donotstash = oldmat->donotstash; 2854 a->roworiented = oldmat->roworiented; 2855 a->rowindices = 0; 2856 a->rowvalues = 0; 2857 a->getrowactive = PETSC_FALSE; 2858 2859 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2860 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2861 2862 if (oldmat->colmap) { 2863 #if defined(PETSC_USE_CTABLE) 2864 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2865 #else 2866 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2867 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2868 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2869 #endif 2870 } else a->colmap = 0; 2871 if (oldmat->garray) { 2872 PetscInt len; 2873 len = oldmat->B->cmap->n; 2874 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2875 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2876 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2877 } else a->garray = 0; 2878 2879 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2881 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2883 2884 if (oldmat->Mvctx_mpi1) { 2885 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2887 } 2888 2889 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2891 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2892 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2893 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2894 *newmat = mat; 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2899 { 2900 PetscBool isbinary, ishdf5; 2901 PetscErrorCode ierr; 2902 2903 PetscFunctionBegin; 2904 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2905 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2906 /* force binary viewer to load .info file if it has not yet done so */ 2907 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2908 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2909 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2910 if (isbinary) { 2911 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2912 } else if (ishdf5) { 2913 #if defined(PETSC_HAVE_HDF5) 2914 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2915 #else 2916 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2917 #endif 2918 } else { 2919 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2920 } 2921 PetscFunctionReturn(0); 2922 } 2923 2924 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2925 { 2926 PetscScalar *vals,*svals; 2927 MPI_Comm comm; 2928 PetscErrorCode ierr; 2929 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2930 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2931 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2932 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2933 PetscInt cend,cstart,n,*rowners; 2934 int fd; 2935 PetscInt bs = newMat->rmap->bs; 2936 2937 PetscFunctionBegin; 2938 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2940 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2942 if (!rank) { 2943 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2944 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2945 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2946 } 2947 2948 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2949 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2950 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2951 if (bs < 0) bs = 1; 2952 2953 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2954 M = header[1]; N = header[2]; 2955 2956 /* If global sizes are set, check if they are consistent with that given in the file */ 2957 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2958 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2959 2960 /* determine ownership of all (block) rows */ 2961 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2962 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2963 else m = newMat->rmap->n; /* Set by user */ 2964 2965 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2966 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2967 2968 /* First process needs enough room for process with most rows */ 2969 if (!rank) { 2970 mmax = rowners[1]; 2971 for (i=2; i<=size; i++) { 2972 mmax = PetscMax(mmax, rowners[i]); 2973 } 2974 } else mmax = -1; /* unused, but compilers complain */ 2975 2976 rowners[0] = 0; 2977 for (i=2; i<=size; i++) { 2978 rowners[i] += rowners[i-1]; 2979 } 2980 rstart = rowners[rank]; 2981 rend = rowners[rank+1]; 2982 2983 /* distribute row lengths to all processors */ 2984 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2985 if (!rank) { 2986 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 2987 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2988 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2989 for (j=0; j<m; j++) { 2990 procsnz[0] += ourlens[j]; 2991 } 2992 for (i=1; i<size; i++) { 2993 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 2994 /* calculate the number of nonzeros on each processor */ 2995 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2996 procsnz[i] += rowlengths[j]; 2997 } 2998 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2999 } 3000 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3001 } else { 3002 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3003 } 3004 3005 if (!rank) { 3006 /* determine max buffer needed and allocate it */ 3007 maxnz = 0; 3008 for (i=0; i<size; i++) { 3009 maxnz = PetscMax(maxnz,procsnz[i]); 3010 } 3011 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3012 3013 /* read in my part of the matrix column indices */ 3014 nz = procsnz[0]; 3015 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3016 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3017 3018 /* read in every one elses and ship off */ 3019 for (i=1; i<size; i++) { 3020 nz = procsnz[i]; 3021 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3022 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3023 } 3024 ierr = PetscFree(cols);CHKERRQ(ierr); 3025 } else { 3026 /* determine buffer space needed for message */ 3027 nz = 0; 3028 for (i=0; i<m; i++) { 3029 nz += ourlens[i]; 3030 } 3031 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3032 3033 /* receive message of column indices*/ 3034 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3035 } 3036 3037 /* determine column ownership if matrix is not square */ 3038 if (N != M) { 3039 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3040 else n = newMat->cmap->n; 3041 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3042 cstart = cend - n; 3043 } else { 3044 cstart = rstart; 3045 cend = rend; 3046 n = cend - cstart; 3047 } 3048 3049 /* loop over local rows, determining number of off diagonal entries */ 3050 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3051 jj = 0; 3052 for (i=0; i<m; i++) { 3053 for (j=0; j<ourlens[i]; j++) { 3054 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3055 jj++; 3056 } 3057 } 3058 3059 for (i=0; i<m; i++) { 3060 ourlens[i] -= offlens[i]; 3061 } 3062 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3063 3064 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3065 3066 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3067 3068 for (i=0; i<m; i++) { 3069 ourlens[i] += offlens[i]; 3070 } 3071 3072 if (!rank) { 3073 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3074 3075 /* read in my part of the matrix numerical values */ 3076 nz = procsnz[0]; 3077 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3078 3079 /* insert into matrix */ 3080 jj = rstart; 3081 smycols = mycols; 3082 svals = vals; 3083 for (i=0; i<m; i++) { 3084 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3085 smycols += ourlens[i]; 3086 svals += ourlens[i]; 3087 jj++; 3088 } 3089 3090 /* read in other processors and ship out */ 3091 for (i=1; i<size; i++) { 3092 nz = procsnz[i]; 3093 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3094 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3095 } 3096 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3097 } else { 3098 /* receive numeric values */ 3099 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3100 3101 /* receive message of values*/ 3102 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3103 3104 /* insert into matrix */ 3105 jj = rstart; 3106 smycols = mycols; 3107 svals = vals; 3108 for (i=0; i<m; i++) { 3109 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3110 smycols += ourlens[i]; 3111 svals += ourlens[i]; 3112 jj++; 3113 } 3114 } 3115 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3116 ierr = PetscFree(vals);CHKERRQ(ierr); 3117 ierr = PetscFree(mycols);CHKERRQ(ierr); 3118 ierr = PetscFree(rowners);CHKERRQ(ierr); 3119 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3120 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 /* Not scalable because of ISAllGather() unless getting all columns. */ 3125 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3126 { 3127 PetscErrorCode ierr; 3128 IS iscol_local; 3129 PetscBool isstride; 3130 PetscMPIInt lisstride=0,gisstride; 3131 3132 PetscFunctionBegin; 3133 /* check if we are grabbing all columns*/ 3134 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3135 3136 if (isstride) { 3137 PetscInt start,len,mstart,mlen; 3138 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3139 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3140 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3141 if (mstart == start && mlen-mstart == len) lisstride = 1; 3142 } 3143 3144 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3145 if (gisstride) { 3146 PetscInt N; 3147 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3148 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3149 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3150 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3151 } else { 3152 PetscInt cbs; 3153 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3154 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3155 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3156 } 3157 3158 *isseq = iscol_local; 3159 PetscFunctionReturn(0); 3160 } 3161 3162 /* 3163 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3164 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3165 3166 Input Parameters: 3167 mat - matrix 3168 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3169 i.e., mat->rstart <= isrow[i] < mat->rend 3170 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3171 i.e., mat->cstart <= iscol[i] < mat->cend 3172 Output Parameter: 3173 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3174 iscol_o - sequential column index set for retrieving mat->B 3175 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3176 */ 3177 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3178 { 3179 PetscErrorCode ierr; 3180 Vec x,cmap; 3181 const PetscInt *is_idx; 3182 PetscScalar *xarray,*cmaparray; 3183 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3184 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3185 Mat B=a->B; 3186 Vec lvec=a->lvec,lcmap; 3187 PetscInt i,cstart,cend,Bn=B->cmap->N; 3188 MPI_Comm comm; 3189 VecScatter Mvctx=a->Mvctx; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3193 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3194 3195 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3196 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3197 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3198 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3199 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3200 3201 /* Get start indices */ 3202 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3203 isstart -= ncols; 3204 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3205 3206 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3207 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3208 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3209 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3210 for (i=0; i<ncols; i++) { 3211 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3212 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3213 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3214 } 3215 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3216 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3217 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3218 3219 /* Get iscol_d */ 3220 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3221 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3222 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3223 3224 /* Get isrow_d */ 3225 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3226 rstart = mat->rmap->rstart; 3227 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3228 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3229 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3230 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3231 3232 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3233 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3234 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3235 3236 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3237 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3239 3240 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3241 3242 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3244 3245 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3246 /* off-process column indices */ 3247 count = 0; 3248 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3249 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3250 3251 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3252 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3253 for (i=0; i<Bn; i++) { 3254 if (PetscRealPart(xarray[i]) > -1.0) { 3255 idx[count] = i; /* local column index in off-diagonal part B */ 3256 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3257 count++; 3258 } 3259 } 3260 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3261 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3262 3263 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3264 /* cannot ensure iscol_o has same blocksize as iscol! */ 3265 3266 ierr = PetscFree(idx);CHKERRQ(ierr); 3267 *garray = cmap1; 3268 3269 ierr = VecDestroy(&x);CHKERRQ(ierr); 3270 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3271 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3272 PetscFunctionReturn(0); 3273 } 3274 3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3277 { 3278 PetscErrorCode ierr; 3279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3280 Mat M = NULL; 3281 MPI_Comm comm; 3282 IS iscol_d,isrow_d,iscol_o; 3283 Mat Asub = NULL,Bsub = NULL; 3284 PetscInt n; 3285 3286 PetscFunctionBegin; 3287 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3288 3289 if (call == MAT_REUSE_MATRIX) { 3290 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3291 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3292 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3293 3294 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3295 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3296 3297 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3298 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3299 3300 /* Update diagonal and off-diagonal portions of submat */ 3301 asub = (Mat_MPIAIJ*)(*submat)->data; 3302 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3303 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3304 if (n) { 3305 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3306 } 3307 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3308 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3309 3310 } else { /* call == MAT_INITIAL_MATRIX) */ 3311 const PetscInt *garray; 3312 PetscInt BsubN; 3313 3314 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3315 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3316 3317 /* Create local submatrices Asub and Bsub */ 3318 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3319 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3320 3321 /* Create submatrix M */ 3322 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3323 3324 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3325 asub = (Mat_MPIAIJ*)M->data; 3326 3327 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3328 n = asub->B->cmap->N; 3329 if (BsubN > n) { 3330 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3331 const PetscInt *idx; 3332 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3333 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3334 3335 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3336 j = 0; 3337 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3338 for (i=0; i<n; i++) { 3339 if (j >= BsubN) break; 3340 while (subgarray[i] > garray[j]) j++; 3341 3342 if (subgarray[i] == garray[j]) { 3343 idx_new[i] = idx[j++]; 3344 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3345 } 3346 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3347 3348 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3349 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3350 3351 } else if (BsubN < n) { 3352 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3353 } 3354 3355 ierr = PetscFree(garray);CHKERRQ(ierr); 3356 *submat = M; 3357 3358 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3359 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3360 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3361 3362 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3363 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3364 3365 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3366 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3367 } 3368 PetscFunctionReturn(0); 3369 } 3370 3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3372 { 3373 PetscErrorCode ierr; 3374 IS iscol_local=NULL,isrow_d; 3375 PetscInt csize; 3376 PetscInt n,i,j,start,end; 3377 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3378 MPI_Comm comm; 3379 3380 PetscFunctionBegin; 3381 /* If isrow has same processor distribution as mat, 3382 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3383 if (call == MAT_REUSE_MATRIX) { 3384 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3385 if (isrow_d) { 3386 sameRowDist = PETSC_TRUE; 3387 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3388 } else { 3389 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3390 if (iscol_local) { 3391 sameRowDist = PETSC_TRUE; 3392 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3393 } 3394 } 3395 } else { 3396 /* Check if isrow has same processor distribution as mat */ 3397 sameDist[0] = PETSC_FALSE; 3398 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3399 if (!n) { 3400 sameDist[0] = PETSC_TRUE; 3401 } else { 3402 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3403 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3404 if (i >= start && j < end) { 3405 sameDist[0] = PETSC_TRUE; 3406 } 3407 } 3408 3409 /* Check if iscol has same processor distribution as mat */ 3410 sameDist[1] = PETSC_FALSE; 3411 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3412 if (!n) { 3413 sameDist[1] = PETSC_TRUE; 3414 } else { 3415 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3416 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3417 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3418 } 3419 3420 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3421 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3422 sameRowDist = tsameDist[0]; 3423 } 3424 3425 if (sameRowDist) { 3426 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3427 /* isrow and iscol have same processor distribution as mat */ 3428 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3429 PetscFunctionReturn(0); 3430 } else { /* sameRowDist */ 3431 /* isrow has same processor distribution as mat */ 3432 if (call == MAT_INITIAL_MATRIX) { 3433 PetscBool sorted; 3434 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3435 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3436 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3437 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3438 3439 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3440 if (sorted) { 3441 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3442 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 } else { /* call == MAT_REUSE_MATRIX */ 3446 IS iscol_sub; 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3448 if (iscol_sub) { 3449 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 } 3453 } 3454 } 3455 3456 /* General case: iscol -> iscol_local which has global size of iscol */ 3457 if (call == MAT_REUSE_MATRIX) { 3458 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3459 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3460 } else { 3461 if (!iscol_local) { 3462 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3463 } 3464 } 3465 3466 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3467 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3468 3469 if (call == MAT_INITIAL_MATRIX) { 3470 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3471 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3472 } 3473 PetscFunctionReturn(0); 3474 } 3475 3476 /*@C 3477 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3478 and "off-diagonal" part of the matrix in CSR format. 3479 3480 Collective 3481 3482 Input Parameters: 3483 + comm - MPI communicator 3484 . A - "diagonal" portion of matrix 3485 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3486 - garray - global index of B columns 3487 3488 Output Parameter: 3489 . mat - the matrix, with input A as its local diagonal matrix 3490 Level: advanced 3491 3492 Notes: 3493 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3494 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3495 3496 .seealso: MatCreateMPIAIJWithSplitArrays() 3497 @*/ 3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3499 { 3500 PetscErrorCode ierr; 3501 Mat_MPIAIJ *maij; 3502 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3503 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3504 PetscScalar *oa=b->a; 3505 Mat Bnew; 3506 PetscInt m,n,N; 3507 3508 PetscFunctionBegin; 3509 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3510 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3511 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3512 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3513 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3514 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3515 3516 /* Get global columns of mat */ 3517 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3518 3519 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3520 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3521 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3522 maij = (Mat_MPIAIJ*)(*mat)->data; 3523 3524 (*mat)->preallocated = PETSC_TRUE; 3525 3526 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3527 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3528 3529 /* Set A as diagonal portion of *mat */ 3530 maij->A = A; 3531 3532 nz = oi[m]; 3533 for (i=0; i<nz; i++) { 3534 col = oj[i]; 3535 oj[i] = garray[col]; 3536 } 3537 3538 /* Set Bnew as off-diagonal portion of *mat */ 3539 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3540 bnew = (Mat_SeqAIJ*)Bnew->data; 3541 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3542 maij->B = Bnew; 3543 3544 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3545 3546 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3547 b->free_a = PETSC_FALSE; 3548 b->free_ij = PETSC_FALSE; 3549 ierr = MatDestroy(&B);CHKERRQ(ierr); 3550 3551 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3552 bnew->free_a = PETSC_TRUE; 3553 bnew->free_ij = PETSC_TRUE; 3554 3555 /* condense columns of maij->B */ 3556 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3557 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3558 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3559 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3560 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3565 3566 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3567 { 3568 PetscErrorCode ierr; 3569 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3570 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3571 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3572 Mat M,Msub,B=a->B; 3573 MatScalar *aa; 3574 Mat_SeqAIJ *aij; 3575 PetscInt *garray = a->garray,*colsub,Ncols; 3576 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3577 IS iscol_sub,iscmap; 3578 const PetscInt *is_idx,*cmap; 3579 PetscBool allcolumns=PETSC_FALSE; 3580 MPI_Comm comm; 3581 3582 PetscFunctionBegin; 3583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3584 3585 if (call == MAT_REUSE_MATRIX) { 3586 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3587 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3588 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3589 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3591 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3592 3593 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3594 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3595 3596 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3597 3598 } else { /* call == MAT_INITIAL_MATRIX) */ 3599 PetscBool flg; 3600 3601 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3602 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3603 3604 /* (1) iscol -> nonscalable iscol_local */ 3605 /* Check for special case: each processor gets entire matrix columns */ 3606 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3607 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3608 if (allcolumns) { 3609 iscol_sub = iscol_local; 3610 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3611 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3612 3613 } else { 3614 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3615 PetscInt *idx,*cmap1,k; 3616 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3617 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3618 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3619 count = 0; 3620 k = 0; 3621 for (i=0; i<Ncols; i++) { 3622 j = is_idx[i]; 3623 if (j >= cstart && j < cend) { 3624 /* diagonal part of mat */ 3625 idx[count] = j; 3626 cmap1[count++] = i; /* column index in submat */ 3627 } else if (Bn) { 3628 /* off-diagonal part of mat */ 3629 if (j == garray[k]) { 3630 idx[count] = j; 3631 cmap1[count++] = i; /* column index in submat */ 3632 } else if (j > garray[k]) { 3633 while (j > garray[k] && k < Bn-1) k++; 3634 if (j == garray[k]) { 3635 idx[count] = j; 3636 cmap1[count++] = i; /* column index in submat */ 3637 } 3638 } 3639 } 3640 } 3641 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3642 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3644 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3645 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3646 3647 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3648 } 3649 3650 /* (3) Create sequential Msub */ 3651 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3652 } 3653 3654 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3655 aij = (Mat_SeqAIJ*)(Msub)->data; 3656 ii = aij->i; 3657 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3658 3659 /* 3660 m - number of local rows 3661 Ncols - number of columns (same on all processors) 3662 rstart - first row in new global matrix generated 3663 */ 3664 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3665 3666 if (call == MAT_INITIAL_MATRIX) { 3667 /* (4) Create parallel newmat */ 3668 PetscMPIInt rank,size; 3669 PetscInt csize; 3670 3671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3672 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3673 3674 /* 3675 Determine the number of non-zeros in the diagonal and off-diagonal 3676 portions of the matrix in order to do correct preallocation 3677 */ 3678 3679 /* first get start and end of "diagonal" columns */ 3680 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3681 if (csize == PETSC_DECIDE) { 3682 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3683 if (mglobal == Ncols) { /* square matrix */ 3684 nlocal = m; 3685 } else { 3686 nlocal = Ncols/size + ((Ncols % size) > rank); 3687 } 3688 } else { 3689 nlocal = csize; 3690 } 3691 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3692 rstart = rend - nlocal; 3693 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3694 3695 /* next, compute all the lengths */ 3696 jj = aij->j; 3697 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3698 olens = dlens + m; 3699 for (i=0; i<m; i++) { 3700 jend = ii[i+1] - ii[i]; 3701 olen = 0; 3702 dlen = 0; 3703 for (j=0; j<jend; j++) { 3704 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3705 else dlen++; 3706 jj++; 3707 } 3708 olens[i] = olen; 3709 dlens[i] = dlen; 3710 } 3711 3712 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3713 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3714 3715 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3716 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3717 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3718 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3719 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3720 ierr = PetscFree(dlens);CHKERRQ(ierr); 3721 3722 } else { /* call == MAT_REUSE_MATRIX */ 3723 M = *newmat; 3724 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3725 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3726 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3727 /* 3728 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3729 rather than the slower MatSetValues(). 3730 */ 3731 M->was_assembled = PETSC_TRUE; 3732 M->assembled = PETSC_FALSE; 3733 } 3734 3735 /* (5) Set values of Msub to *newmat */ 3736 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3737 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3738 3739 jj = aij->j; 3740 aa = aij->a; 3741 for (i=0; i<m; i++) { 3742 row = rstart + i; 3743 nz = ii[i+1] - ii[i]; 3744 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3745 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3746 jj += nz; aa += nz; 3747 } 3748 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3749 3750 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 3753 ierr = PetscFree(colsub);CHKERRQ(ierr); 3754 3755 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3756 if (call == MAT_INITIAL_MATRIX) { 3757 *newmat = M; 3758 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3759 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3760 3761 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3762 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3763 3764 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3765 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3766 3767 if (iscol_local) { 3768 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3769 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3770 } 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 /* 3776 Not great since it makes two copies of the submatrix, first an SeqAIJ 3777 in local and then by concatenating the local matrices the end result. 3778 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3779 3780 Note: This requires a sequential iscol with all indices. 3781 */ 3782 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3783 { 3784 PetscErrorCode ierr; 3785 PetscMPIInt rank,size; 3786 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3787 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3788 Mat M,Mreuse; 3789 MatScalar *aa,*vwork; 3790 MPI_Comm comm; 3791 Mat_SeqAIJ *aij; 3792 PetscBool colflag,allcolumns=PETSC_FALSE; 3793 3794 PetscFunctionBegin; 3795 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3796 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3797 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3798 3799 /* Check for special case: each processor gets entire matrix columns */ 3800 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3801 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3802 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3803 3804 if (call == MAT_REUSE_MATRIX) { 3805 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3806 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3807 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3808 } else { 3809 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3810 } 3811 3812 /* 3813 m - number of local rows 3814 n - number of columns (same on all processors) 3815 rstart - first row in new global matrix generated 3816 */ 3817 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3818 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3819 if (call == MAT_INITIAL_MATRIX) { 3820 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3821 ii = aij->i; 3822 jj = aij->j; 3823 3824 /* 3825 Determine the number of non-zeros in the diagonal and off-diagonal 3826 portions of the matrix in order to do correct preallocation 3827 */ 3828 3829 /* first get start and end of "diagonal" columns */ 3830 if (csize == PETSC_DECIDE) { 3831 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3832 if (mglobal == n) { /* square matrix */ 3833 nlocal = m; 3834 } else { 3835 nlocal = n/size + ((n % size) > rank); 3836 } 3837 } else { 3838 nlocal = csize; 3839 } 3840 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3841 rstart = rend - nlocal; 3842 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3843 3844 /* next, compute all the lengths */ 3845 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3846 olens = dlens + m; 3847 for (i=0; i<m; i++) { 3848 jend = ii[i+1] - ii[i]; 3849 olen = 0; 3850 dlen = 0; 3851 for (j=0; j<jend; j++) { 3852 if (*jj < rstart || *jj >= rend) olen++; 3853 else dlen++; 3854 jj++; 3855 } 3856 olens[i] = olen; 3857 dlens[i] = dlen; 3858 } 3859 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3860 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3861 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3862 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3863 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3864 ierr = PetscFree(dlens);CHKERRQ(ierr); 3865 } else { 3866 PetscInt ml,nl; 3867 3868 M = *newmat; 3869 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3870 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3871 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3872 /* 3873 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3874 rather than the slower MatSetValues(). 3875 */ 3876 M->was_assembled = PETSC_TRUE; 3877 M->assembled = PETSC_FALSE; 3878 } 3879 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3880 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3881 ii = aij->i; 3882 jj = aij->j; 3883 aa = aij->a; 3884 for (i=0; i<m; i++) { 3885 row = rstart + i; 3886 nz = ii[i+1] - ii[i]; 3887 cwork = jj; jj += nz; 3888 vwork = aa; aa += nz; 3889 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3890 } 3891 3892 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 *newmat = M; 3895 3896 /* save submatrix used in processor for next request */ 3897 if (call == MAT_INITIAL_MATRIX) { 3898 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3899 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3900 } 3901 PetscFunctionReturn(0); 3902 } 3903 3904 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3905 { 3906 PetscInt m,cstart, cend,j,nnz,i,d; 3907 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3908 const PetscInt *JJ; 3909 PetscScalar *values; 3910 PetscErrorCode ierr; 3911 PetscBool nooffprocentries; 3912 3913 PetscFunctionBegin; 3914 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3915 3916 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3917 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3918 m = B->rmap->n; 3919 cstart = B->cmap->rstart; 3920 cend = B->cmap->rend; 3921 rstart = B->rmap->rstart; 3922 3923 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3924 3925 #if defined(PETSC_USE_DEBUG) 3926 for (i=0; i<m; i++) { 3927 nnz = Ii[i+1]- Ii[i]; 3928 JJ = J + Ii[i]; 3929 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3930 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3931 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3932 } 3933 #endif 3934 3935 for (i=0; i<m; i++) { 3936 nnz = Ii[i+1]- Ii[i]; 3937 JJ = J + Ii[i]; 3938 nnz_max = PetscMax(nnz_max,nnz); 3939 d = 0; 3940 for (j=0; j<nnz; j++) { 3941 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3942 } 3943 d_nnz[i] = d; 3944 o_nnz[i] = nnz - d; 3945 } 3946 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3947 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3948 3949 if (v) values = (PetscScalar*)v; 3950 else { 3951 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3952 } 3953 3954 for (i=0; i<m; i++) { 3955 ii = i + rstart; 3956 nnz = Ii[i+1]- Ii[i]; 3957 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3958 } 3959 nooffprocentries = B->nooffprocentries; 3960 B->nooffprocentries = PETSC_TRUE; 3961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 B->nooffprocentries = nooffprocentries; 3964 3965 if (!v) { 3966 ierr = PetscFree(values);CHKERRQ(ierr); 3967 } 3968 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3969 PetscFunctionReturn(0); 3970 } 3971 3972 /*@ 3973 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3974 (the default parallel PETSc format). 3975 3976 Collective 3977 3978 Input Parameters: 3979 + B - the matrix 3980 . i - the indices into j for the start of each local row (starts with zero) 3981 . j - the column indices for each local row (starts with zero) 3982 - v - optional values in the matrix 3983 3984 Level: developer 3985 3986 Notes: 3987 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3988 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3989 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3990 3991 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3992 3993 The format which is used for the sparse matrix input, is equivalent to a 3994 row-major ordering.. i.e for the following matrix, the input data expected is 3995 as shown 3996 3997 $ 1 0 0 3998 $ 2 0 3 P0 3999 $ ------- 4000 $ 4 5 6 P1 4001 $ 4002 $ Process0 [P0]: rows_owned=[0,1] 4003 $ i = {0,1,3} [size = nrow+1 = 2+1] 4004 $ j = {0,0,2} [size = 3] 4005 $ v = {1,2,3} [size = 3] 4006 $ 4007 $ Process1 [P1]: rows_owned=[2] 4008 $ i = {0,3} [size = nrow+1 = 1+1] 4009 $ j = {0,1,2} [size = 3] 4010 $ v = {4,5,6} [size = 3] 4011 4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4013 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4014 @*/ 4015 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4016 { 4017 PetscErrorCode ierr; 4018 4019 PetscFunctionBegin; 4020 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4021 PetscFunctionReturn(0); 4022 } 4023 4024 /*@C 4025 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4026 (the default parallel PETSc format). For good matrix assembly performance 4027 the user should preallocate the matrix storage by setting the parameters 4028 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4029 performance can be increased by more than a factor of 50. 4030 4031 Collective 4032 4033 Input Parameters: 4034 + B - the matrix 4035 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4036 (same value is used for all local rows) 4037 . d_nnz - array containing the number of nonzeros in the various rows of the 4038 DIAGONAL portion of the local submatrix (possibly different for each row) 4039 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4040 The size of this array is equal to the number of local rows, i.e 'm'. 4041 For matrices that will be factored, you must leave room for (and set) 4042 the diagonal entry even if it is zero. 4043 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4044 submatrix (same value is used for all local rows). 4045 - o_nnz - array containing the number of nonzeros in the various rows of the 4046 OFF-DIAGONAL portion of the local submatrix (possibly different for 4047 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4048 structure. The size of this array is equal to the number 4049 of local rows, i.e 'm'. 4050 4051 If the *_nnz parameter is given then the *_nz parameter is ignored 4052 4053 The AIJ format (also called the Yale sparse matrix format or 4054 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4055 storage. The stored row and column indices begin with zero. 4056 See Users-Manual: ch_mat for details. 4057 4058 The parallel matrix is partitioned such that the first m0 rows belong to 4059 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4060 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4061 4062 The DIAGONAL portion of the local submatrix of a processor can be defined 4063 as the submatrix which is obtained by extraction the part corresponding to 4064 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4065 first row that belongs to the processor, r2 is the last row belonging to 4066 the this processor, and c1-c2 is range of indices of the local part of a 4067 vector suitable for applying the matrix to. This is an mxn matrix. In the 4068 common case of a square matrix, the row and column ranges are the same and 4069 the DIAGONAL part is also square. The remaining portion of the local 4070 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4071 4072 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4073 4074 You can call MatGetInfo() to get information on how effective the preallocation was; 4075 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4076 You can also run with the option -info and look for messages with the string 4077 malloc in them to see if additional memory allocation was needed. 4078 4079 Example usage: 4080 4081 Consider the following 8x8 matrix with 34 non-zero values, that is 4082 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4083 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4084 as follows: 4085 4086 .vb 4087 1 2 0 | 0 3 0 | 0 4 4088 Proc0 0 5 6 | 7 0 0 | 8 0 4089 9 0 10 | 11 0 0 | 12 0 4090 ------------------------------------- 4091 13 0 14 | 15 16 17 | 0 0 4092 Proc1 0 18 0 | 19 20 21 | 0 0 4093 0 0 0 | 22 23 0 | 24 0 4094 ------------------------------------- 4095 Proc2 25 26 27 | 0 0 28 | 29 0 4096 30 0 0 | 31 32 33 | 0 34 4097 .ve 4098 4099 This can be represented as a collection of submatrices as: 4100 4101 .vb 4102 A B C 4103 D E F 4104 G H I 4105 .ve 4106 4107 Where the submatrices A,B,C are owned by proc0, D,E,F are 4108 owned by proc1, G,H,I are owned by proc2. 4109 4110 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4111 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4112 The 'M','N' parameters are 8,8, and have the same values on all procs. 4113 4114 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4115 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4116 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4117 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4118 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4119 matrix, ans [DF] as another SeqAIJ matrix. 4120 4121 When d_nz, o_nz parameters are specified, d_nz storage elements are 4122 allocated for every row of the local diagonal submatrix, and o_nz 4123 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4124 One way to choose d_nz and o_nz is to use the max nonzerors per local 4125 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4126 In this case, the values of d_nz,o_nz are: 4127 .vb 4128 proc0 : dnz = 2, o_nz = 2 4129 proc1 : dnz = 3, o_nz = 2 4130 proc2 : dnz = 1, o_nz = 4 4131 .ve 4132 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4133 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4134 for proc3. i.e we are using 12+15+10=37 storage locations to store 4135 34 values. 4136 4137 When d_nnz, o_nnz parameters are specified, the storage is specified 4138 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4139 In the above case the values for d_nnz,o_nnz are: 4140 .vb 4141 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4142 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4143 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4144 .ve 4145 Here the space allocated is sum of all the above values i.e 34, and 4146 hence pre-allocation is perfect. 4147 4148 Level: intermediate 4149 4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4151 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4152 @*/ 4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4154 { 4155 PetscErrorCode ierr; 4156 4157 PetscFunctionBegin; 4158 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4159 PetscValidType(B,1); 4160 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4161 PetscFunctionReturn(0); 4162 } 4163 4164 /*@ 4165 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4166 CSR format for the local rows. 4167 4168 Collective 4169 4170 Input Parameters: 4171 + comm - MPI communicator 4172 . m - number of local rows (Cannot be PETSC_DECIDE) 4173 . n - This value should be the same as the local size used in creating the 4174 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4175 calculated if N is given) For square matrices n is almost always m. 4176 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4177 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4178 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4179 . j - column indices 4180 - a - matrix values 4181 4182 Output Parameter: 4183 . mat - the matrix 4184 4185 Level: intermediate 4186 4187 Notes: 4188 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4189 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4190 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4191 4192 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4193 4194 The format which is used for the sparse matrix input, is equivalent to a 4195 row-major ordering.. i.e for the following matrix, the input data expected is 4196 as shown 4197 4198 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4199 4200 $ 1 0 0 4201 $ 2 0 3 P0 4202 $ ------- 4203 $ 4 5 6 P1 4204 $ 4205 $ Process0 [P0]: rows_owned=[0,1] 4206 $ i = {0,1,3} [size = nrow+1 = 2+1] 4207 $ j = {0,0,2} [size = 3] 4208 $ v = {1,2,3} [size = 3] 4209 $ 4210 $ Process1 [P1]: rows_owned=[2] 4211 $ i = {0,3} [size = nrow+1 = 1+1] 4212 $ j = {0,1,2} [size = 3] 4213 $ v = {4,5,6} [size = 3] 4214 4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4216 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4217 @*/ 4218 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4219 { 4220 PetscErrorCode ierr; 4221 4222 PetscFunctionBegin; 4223 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4224 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4225 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4226 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4227 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4228 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4229 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4230 PetscFunctionReturn(0); 4231 } 4232 4233 /*@ 4234 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4235 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4236 4237 Collective 4238 4239 Input Parameters: 4240 + mat - the matrix 4241 . m - number of local rows (Cannot be PETSC_DECIDE) 4242 . n - This value should be the same as the local size used in creating the 4243 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4244 calculated if N is given) For square matrices n is almost always m. 4245 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4246 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4247 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4248 . J - column indices 4249 - v - matrix values 4250 4251 Level: intermediate 4252 4253 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4254 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4255 @*/ 4256 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4257 { 4258 PetscErrorCode ierr; 4259 PetscInt cstart,nnz,i,j; 4260 PetscInt *ld; 4261 PetscBool nooffprocentries; 4262 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4263 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4264 PetscScalar *ad = Ad->a, *ao = Ao->a; 4265 const PetscInt *Adi = Ad->i; 4266 PetscInt ldi,Iii,md; 4267 4268 PetscFunctionBegin; 4269 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4270 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4271 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4272 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4273 4274 cstart = mat->cmap->rstart; 4275 if (!Aij->ld) { 4276 /* count number of entries below block diagonal */ 4277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4278 Aij->ld = ld; 4279 for (i=0; i<m; i++) { 4280 nnz = Ii[i+1]- Ii[i]; 4281 j = 0; 4282 while (J[j] < cstart && j < nnz) {j++;} 4283 J += nnz; 4284 ld[i] = j; 4285 } 4286 } else { 4287 ld = Aij->ld; 4288 } 4289 4290 for (i=0; i<m; i++) { 4291 nnz = Ii[i+1]- Ii[i]; 4292 Iii = Ii[i]; 4293 ldi = ld[i]; 4294 md = Adi[i+1]-Adi[i]; 4295 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4296 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4297 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4298 ad += md; 4299 ao += nnz - md; 4300 } 4301 nooffprocentries = mat->nooffprocentries; 4302 mat->nooffprocentries = PETSC_TRUE; 4303 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4304 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4305 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4306 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4307 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4308 mat->nooffprocentries = nooffprocentries; 4309 PetscFunctionReturn(0); 4310 } 4311 4312 /*@C 4313 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4314 (the default parallel PETSc format). For good matrix assembly performance 4315 the user should preallocate the matrix storage by setting the parameters 4316 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4317 performance can be increased by more than a factor of 50. 4318 4319 Collective 4320 4321 Input Parameters: 4322 + comm - MPI communicator 4323 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4324 This value should be the same as the local size used in creating the 4325 y vector for the matrix-vector product y = Ax. 4326 . n - This value should be the same as the local size used in creating the 4327 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4328 calculated if N is given) For square matrices n is almost always m. 4329 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4330 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4331 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4332 (same value is used for all local rows) 4333 . d_nnz - array containing the number of nonzeros in the various rows of the 4334 DIAGONAL portion of the local submatrix (possibly different for each row) 4335 or NULL, if d_nz is used to specify the nonzero structure. 4336 The size of this array is equal to the number of local rows, i.e 'm'. 4337 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4338 submatrix (same value is used for all local rows). 4339 - o_nnz - array containing the number of nonzeros in the various rows of the 4340 OFF-DIAGONAL portion of the local submatrix (possibly different for 4341 each row) or NULL, if o_nz is used to specify the nonzero 4342 structure. The size of this array is equal to the number 4343 of local rows, i.e 'm'. 4344 4345 Output Parameter: 4346 . A - the matrix 4347 4348 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4349 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4350 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4351 4352 Notes: 4353 If the *_nnz parameter is given then the *_nz parameter is ignored 4354 4355 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4356 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4357 storage requirements for this matrix. 4358 4359 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4360 processor than it must be used on all processors that share the object for 4361 that argument. 4362 4363 The user MUST specify either the local or global matrix dimensions 4364 (possibly both). 4365 4366 The parallel matrix is partitioned across processors such that the 4367 first m0 rows belong to process 0, the next m1 rows belong to 4368 process 1, the next m2 rows belong to process 2 etc.. where 4369 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4370 values corresponding to [m x N] submatrix. 4371 4372 The columns are logically partitioned with the n0 columns belonging 4373 to 0th partition, the next n1 columns belonging to the next 4374 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4375 4376 The DIAGONAL portion of the local submatrix on any given processor 4377 is the submatrix corresponding to the rows and columns m,n 4378 corresponding to the given processor. i.e diagonal matrix on 4379 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4380 etc. The remaining portion of the local submatrix [m x (N-n)] 4381 constitute the OFF-DIAGONAL portion. The example below better 4382 illustrates this concept. 4383 4384 For a square global matrix we define each processor's diagonal portion 4385 to be its local rows and the corresponding columns (a square submatrix); 4386 each processor's off-diagonal portion encompasses the remainder of the 4387 local matrix (a rectangular submatrix). 4388 4389 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4390 4391 When calling this routine with a single process communicator, a matrix of 4392 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4393 type of communicator, use the construction mechanism 4394 .vb 4395 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4396 .ve 4397 4398 $ MatCreate(...,&A); 4399 $ MatSetType(A,MATMPIAIJ); 4400 $ MatSetSizes(A, m,n,M,N); 4401 $ MatMPIAIJSetPreallocation(A,...); 4402 4403 By default, this format uses inodes (identical nodes) when possible. 4404 We search for consecutive rows with the same nonzero structure, thereby 4405 reusing matrix information to achieve increased efficiency. 4406 4407 Options Database Keys: 4408 + -mat_no_inode - Do not use inodes 4409 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4410 4411 4412 4413 Example usage: 4414 4415 Consider the following 8x8 matrix with 34 non-zero values, that is 4416 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4417 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4418 as follows 4419 4420 .vb 4421 1 2 0 | 0 3 0 | 0 4 4422 Proc0 0 5 6 | 7 0 0 | 8 0 4423 9 0 10 | 11 0 0 | 12 0 4424 ------------------------------------- 4425 13 0 14 | 15 16 17 | 0 0 4426 Proc1 0 18 0 | 19 20 21 | 0 0 4427 0 0 0 | 22 23 0 | 24 0 4428 ------------------------------------- 4429 Proc2 25 26 27 | 0 0 28 | 29 0 4430 30 0 0 | 31 32 33 | 0 34 4431 .ve 4432 4433 This can be represented as a collection of submatrices as 4434 4435 .vb 4436 A B C 4437 D E F 4438 G H I 4439 .ve 4440 4441 Where the submatrices A,B,C are owned by proc0, D,E,F are 4442 owned by proc1, G,H,I are owned by proc2. 4443 4444 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4445 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4446 The 'M','N' parameters are 8,8, and have the same values on all procs. 4447 4448 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4449 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4450 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4451 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4452 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4453 matrix, ans [DF] as another SeqAIJ matrix. 4454 4455 When d_nz, o_nz parameters are specified, d_nz storage elements are 4456 allocated for every row of the local diagonal submatrix, and o_nz 4457 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4458 One way to choose d_nz and o_nz is to use the max nonzerors per local 4459 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4460 In this case, the values of d_nz,o_nz are 4461 .vb 4462 proc0 : dnz = 2, o_nz = 2 4463 proc1 : dnz = 3, o_nz = 2 4464 proc2 : dnz = 1, o_nz = 4 4465 .ve 4466 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4467 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4468 for proc3. i.e we are using 12+15+10=37 storage locations to store 4469 34 values. 4470 4471 When d_nnz, o_nnz parameters are specified, the storage is specified 4472 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4473 In the above case the values for d_nnz,o_nnz are 4474 .vb 4475 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4476 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4477 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4478 .ve 4479 Here the space allocated is sum of all the above values i.e 34, and 4480 hence pre-allocation is perfect. 4481 4482 Level: intermediate 4483 4484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4485 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4486 @*/ 4487 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4488 { 4489 PetscErrorCode ierr; 4490 PetscMPIInt size; 4491 4492 PetscFunctionBegin; 4493 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4494 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4495 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4496 if (size > 1) { 4497 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4498 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4499 } else { 4500 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4501 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4502 } 4503 PetscFunctionReturn(0); 4504 } 4505 4506 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4507 { 4508 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4509 PetscBool flg; 4510 PetscErrorCode ierr; 4511 4512 PetscFunctionBegin; 4513 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4514 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4515 if (Ad) *Ad = a->A; 4516 if (Ao) *Ao = a->B; 4517 if (colmap) *colmap = a->garray; 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4522 { 4523 PetscErrorCode ierr; 4524 PetscInt m,N,i,rstart,nnz,Ii; 4525 PetscInt *indx; 4526 PetscScalar *values; 4527 4528 PetscFunctionBegin; 4529 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4530 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4531 PetscInt *dnz,*onz,sum,bs,cbs; 4532 4533 if (n == PETSC_DECIDE) { 4534 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4535 } 4536 /* Check sum(n) = N */ 4537 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4538 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4539 4540 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4541 rstart -= m; 4542 4543 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4544 for (i=0; i<m; i++) { 4545 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4546 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4547 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4548 } 4549 4550 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4551 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4552 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4553 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4554 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4555 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4556 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4557 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4558 } 4559 4560 /* numeric phase */ 4561 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4562 for (i=0; i<m; i++) { 4563 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4564 Ii = i + rstart; 4565 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4566 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4567 } 4568 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 PetscFunctionReturn(0); 4571 } 4572 4573 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4574 { 4575 PetscErrorCode ierr; 4576 PetscMPIInt rank; 4577 PetscInt m,N,i,rstart,nnz; 4578 size_t len; 4579 const PetscInt *indx; 4580 PetscViewer out; 4581 char *name; 4582 Mat B; 4583 const PetscScalar *values; 4584 4585 PetscFunctionBegin; 4586 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4587 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4588 /* Should this be the type of the diagonal block of A? */ 4589 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4590 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4591 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4592 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4593 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4594 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4595 for (i=0; i<m; i++) { 4596 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4597 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4598 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4599 } 4600 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4601 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 4603 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4604 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4605 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4606 sprintf(name,"%s.%d",outfile,rank); 4607 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4608 ierr = PetscFree(name);CHKERRQ(ierr); 4609 ierr = MatView(B,out);CHKERRQ(ierr); 4610 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4611 ierr = MatDestroy(&B);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 4615 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4616 { 4617 PetscErrorCode ierr; 4618 Mat_Merge_SeqsToMPI *merge; 4619 PetscContainer container; 4620 4621 PetscFunctionBegin; 4622 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4623 if (container) { 4624 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4625 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4626 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4627 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4628 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4629 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4630 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4631 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4632 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4633 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4634 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4635 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4637 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4638 ierr = PetscFree(merge);CHKERRQ(ierr); 4639 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4640 } 4641 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4642 PetscFunctionReturn(0); 4643 } 4644 4645 #include <../src/mat/utils/freespace.h> 4646 #include <petscbt.h> 4647 4648 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4649 { 4650 PetscErrorCode ierr; 4651 MPI_Comm comm; 4652 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4653 PetscMPIInt size,rank,taga,*len_s; 4654 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4655 PetscInt proc,m; 4656 PetscInt **buf_ri,**buf_rj; 4657 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4658 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4659 MPI_Request *s_waits,*r_waits; 4660 MPI_Status *status; 4661 MatScalar *aa=a->a; 4662 MatScalar **abuf_r,*ba_i; 4663 Mat_Merge_SeqsToMPI *merge; 4664 PetscContainer container; 4665 4666 PetscFunctionBegin; 4667 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4668 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4669 4670 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4672 4673 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4674 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4675 4676 bi = merge->bi; 4677 bj = merge->bj; 4678 buf_ri = merge->buf_ri; 4679 buf_rj = merge->buf_rj; 4680 4681 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4682 owners = merge->rowmap->range; 4683 len_s = merge->len_s; 4684 4685 /* send and recv matrix values */ 4686 /*-----------------------------*/ 4687 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4688 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4689 4690 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4691 for (proc=0,k=0; proc<size; proc++) { 4692 if (!len_s[proc]) continue; 4693 i = owners[proc]; 4694 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4695 k++; 4696 } 4697 4698 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4699 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4700 ierr = PetscFree(status);CHKERRQ(ierr); 4701 4702 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4703 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4704 4705 /* insert mat values of mpimat */ 4706 /*----------------------------*/ 4707 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4708 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4709 4710 for (k=0; k<merge->nrecv; k++) { 4711 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4712 nrows = *(buf_ri_k[k]); 4713 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4714 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4715 } 4716 4717 /* set values of ba */ 4718 m = merge->rowmap->n; 4719 for (i=0; i<m; i++) { 4720 arow = owners[rank] + i; 4721 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4722 bnzi = bi[i+1] - bi[i]; 4723 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4724 4725 /* add local non-zero vals of this proc's seqmat into ba */ 4726 anzi = ai[arow+1] - ai[arow]; 4727 aj = a->j + ai[arow]; 4728 aa = a->a + ai[arow]; 4729 nextaj = 0; 4730 for (j=0; nextaj<anzi; j++) { 4731 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4732 ba_i[j] += aa[nextaj++]; 4733 } 4734 } 4735 4736 /* add received vals into ba */ 4737 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4738 /* i-th row */ 4739 if (i == *nextrow[k]) { 4740 anzi = *(nextai[k]+1) - *nextai[k]; 4741 aj = buf_rj[k] + *(nextai[k]); 4742 aa = abuf_r[k] + *(nextai[k]); 4743 nextaj = 0; 4744 for (j=0; nextaj<anzi; j++) { 4745 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4746 ba_i[j] += aa[nextaj++]; 4747 } 4748 } 4749 nextrow[k]++; nextai[k]++; 4750 } 4751 } 4752 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4753 } 4754 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4755 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4756 4757 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4758 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4759 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4760 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4761 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4762 PetscFunctionReturn(0); 4763 } 4764 4765 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4766 { 4767 PetscErrorCode ierr; 4768 Mat B_mpi; 4769 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4770 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4771 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4772 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4773 PetscInt len,proc,*dnz,*onz,bs,cbs; 4774 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4775 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4776 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4777 MPI_Status *status; 4778 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4779 PetscBT lnkbt; 4780 Mat_Merge_SeqsToMPI *merge; 4781 PetscContainer container; 4782 4783 PetscFunctionBegin; 4784 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4785 4786 /* make sure it is a PETSc comm */ 4787 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4788 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4789 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4790 4791 ierr = PetscNew(&merge);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4793 4794 /* determine row ownership */ 4795 /*---------------------------------------------------------*/ 4796 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4797 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4798 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4799 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4800 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4801 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4802 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4803 4804 m = merge->rowmap->n; 4805 owners = merge->rowmap->range; 4806 4807 /* determine the number of messages to send, their lengths */ 4808 /*---------------------------------------------------------*/ 4809 len_s = merge->len_s; 4810 4811 len = 0; /* length of buf_si[] */ 4812 merge->nsend = 0; 4813 for (proc=0; proc<size; proc++) { 4814 len_si[proc] = 0; 4815 if (proc == rank) { 4816 len_s[proc] = 0; 4817 } else { 4818 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4819 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4820 } 4821 if (len_s[proc]) { 4822 merge->nsend++; 4823 nrows = 0; 4824 for (i=owners[proc]; i<owners[proc+1]; i++) { 4825 if (ai[i+1] > ai[i]) nrows++; 4826 } 4827 len_si[proc] = 2*(nrows+1); 4828 len += len_si[proc]; 4829 } 4830 } 4831 4832 /* determine the number and length of messages to receive for ij-structure */ 4833 /*-------------------------------------------------------------------------*/ 4834 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4835 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4836 4837 /* post the Irecv of j-structure */ 4838 /*-------------------------------*/ 4839 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4840 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4841 4842 /* post the Isend of j-structure */ 4843 /*--------------------------------*/ 4844 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4845 4846 for (proc=0, k=0; proc<size; proc++) { 4847 if (!len_s[proc]) continue; 4848 i = owners[proc]; 4849 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4850 k++; 4851 } 4852 4853 /* receives and sends of j-structure are complete */ 4854 /*------------------------------------------------*/ 4855 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4856 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4857 4858 /* send and recv i-structure */ 4859 /*---------------------------*/ 4860 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4861 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4862 4863 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4864 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4865 for (proc=0,k=0; proc<size; proc++) { 4866 if (!len_s[proc]) continue; 4867 /* form outgoing message for i-structure: 4868 buf_si[0]: nrows to be sent 4869 [1:nrows]: row index (global) 4870 [nrows+1:2*nrows+1]: i-structure index 4871 */ 4872 /*-------------------------------------------*/ 4873 nrows = len_si[proc]/2 - 1; 4874 buf_si_i = buf_si + nrows+1; 4875 buf_si[0] = nrows; 4876 buf_si_i[0] = 0; 4877 nrows = 0; 4878 for (i=owners[proc]; i<owners[proc+1]; i++) { 4879 anzi = ai[i+1] - ai[i]; 4880 if (anzi) { 4881 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4882 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4883 nrows++; 4884 } 4885 } 4886 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4887 k++; 4888 buf_si += len_si[proc]; 4889 } 4890 4891 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4892 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4893 4894 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4895 for (i=0; i<merge->nrecv; i++) { 4896 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4897 } 4898 4899 ierr = PetscFree(len_si);CHKERRQ(ierr); 4900 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4901 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4902 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4903 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4904 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4905 ierr = PetscFree(status);CHKERRQ(ierr); 4906 4907 /* compute a local seq matrix in each processor */ 4908 /*----------------------------------------------*/ 4909 /* allocate bi array and free space for accumulating nonzero column info */ 4910 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4911 bi[0] = 0; 4912 4913 /* create and initialize a linked list */ 4914 nlnk = N+1; 4915 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4916 4917 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4918 len = ai[owners[rank+1]] - ai[owners[rank]]; 4919 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4920 4921 current_space = free_space; 4922 4923 /* determine symbolic info for each local row */ 4924 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4925 4926 for (k=0; k<merge->nrecv; k++) { 4927 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4928 nrows = *buf_ri_k[k]; 4929 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4930 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4931 } 4932 4933 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4934 len = 0; 4935 for (i=0; i<m; i++) { 4936 bnzi = 0; 4937 /* add local non-zero cols of this proc's seqmat into lnk */ 4938 arow = owners[rank] + i; 4939 anzi = ai[arow+1] - ai[arow]; 4940 aj = a->j + ai[arow]; 4941 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4942 bnzi += nlnk; 4943 /* add received col data into lnk */ 4944 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4945 if (i == *nextrow[k]) { /* i-th row */ 4946 anzi = *(nextai[k]+1) - *nextai[k]; 4947 aj = buf_rj[k] + *nextai[k]; 4948 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4949 bnzi += nlnk; 4950 nextrow[k]++; nextai[k]++; 4951 } 4952 } 4953 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4954 4955 /* if free space is not available, make more free space */ 4956 if (current_space->local_remaining<bnzi) { 4957 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4958 nspacedouble++; 4959 } 4960 /* copy data into free space, then initialize lnk */ 4961 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4962 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4963 4964 current_space->array += bnzi; 4965 current_space->local_used += bnzi; 4966 current_space->local_remaining -= bnzi; 4967 4968 bi[i+1] = bi[i] + bnzi; 4969 } 4970 4971 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4972 4973 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4974 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4975 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4976 4977 /* create symbolic parallel matrix B_mpi */ 4978 /*---------------------------------------*/ 4979 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4980 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4981 if (n==PETSC_DECIDE) { 4982 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4983 } else { 4984 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4985 } 4986 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4987 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4988 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4989 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4990 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4991 4992 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4993 B_mpi->assembled = PETSC_FALSE; 4994 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4995 merge->bi = bi; 4996 merge->bj = bj; 4997 merge->buf_ri = buf_ri; 4998 merge->buf_rj = buf_rj; 4999 merge->coi = NULL; 5000 merge->coj = NULL; 5001 merge->owners_co = NULL; 5002 5003 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5004 5005 /* attach the supporting struct to B_mpi for reuse */ 5006 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5007 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5008 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5009 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5010 *mpimat = B_mpi; 5011 5012 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 /*@C 5017 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5018 matrices from each processor 5019 5020 Collective 5021 5022 Input Parameters: 5023 + comm - the communicators the parallel matrix will live on 5024 . seqmat - the input sequential matrices 5025 . m - number of local rows (or PETSC_DECIDE) 5026 . n - number of local columns (or PETSC_DECIDE) 5027 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5028 5029 Output Parameter: 5030 . mpimat - the parallel matrix generated 5031 5032 Level: advanced 5033 5034 Notes: 5035 The dimensions of the sequential matrix in each processor MUST be the same. 5036 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5037 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5038 @*/ 5039 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5040 { 5041 PetscErrorCode ierr; 5042 PetscMPIInt size; 5043 5044 PetscFunctionBegin; 5045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5046 if (size == 1) { 5047 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5048 if (scall == MAT_INITIAL_MATRIX) { 5049 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5050 } else { 5051 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5052 } 5053 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5054 PetscFunctionReturn(0); 5055 } 5056 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5057 if (scall == MAT_INITIAL_MATRIX) { 5058 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5059 } 5060 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5061 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 /*@ 5066 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5067 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5068 with MatGetSize() 5069 5070 Not Collective 5071 5072 Input Parameters: 5073 + A - the matrix 5074 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5075 5076 Output Parameter: 5077 . A_loc - the local sequential matrix generated 5078 5079 Level: developer 5080 5081 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5082 5083 @*/ 5084 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5085 { 5086 PetscErrorCode ierr; 5087 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5088 Mat_SeqAIJ *mat,*a,*b; 5089 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5090 MatScalar *aa,*ba,*cam; 5091 PetscScalar *ca; 5092 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5093 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5094 PetscBool match; 5095 MPI_Comm comm; 5096 PetscMPIInt size; 5097 5098 PetscFunctionBegin; 5099 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5100 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5101 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5103 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5104 5105 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5106 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5107 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5108 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5109 aa = a->a; ba = b->a; 5110 if (scall == MAT_INITIAL_MATRIX) { 5111 if (size == 1) { 5112 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5117 ci[0] = 0; 5118 for (i=0; i<am; i++) { 5119 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5120 } 5121 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5122 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5123 k = 0; 5124 for (i=0; i<am; i++) { 5125 ncols_o = bi[i+1] - bi[i]; 5126 ncols_d = ai[i+1] - ai[i]; 5127 /* off-diagonal portion of A */ 5128 for (jo=0; jo<ncols_o; jo++) { 5129 col = cmap[*bj]; 5130 if (col >= cstart) break; 5131 cj[k] = col; bj++; 5132 ca[k++] = *ba++; 5133 } 5134 /* diagonal portion of A */ 5135 for (j=0; j<ncols_d; j++) { 5136 cj[k] = cstart + *aj++; 5137 ca[k++] = *aa++; 5138 } 5139 /* off-diagonal portion of A */ 5140 for (j=jo; j<ncols_o; j++) { 5141 cj[k] = cmap[*bj++]; 5142 ca[k++] = *ba++; 5143 } 5144 } 5145 /* put together the new matrix */ 5146 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5147 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5148 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5149 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5150 mat->free_a = PETSC_TRUE; 5151 mat->free_ij = PETSC_TRUE; 5152 mat->nonew = 0; 5153 } else if (scall == MAT_REUSE_MATRIX) { 5154 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5155 ci = mat->i; cj = mat->j; cam = mat->a; 5156 for (i=0; i<am; i++) { 5157 /* off-diagonal portion of A */ 5158 ncols_o = bi[i+1] - bi[i]; 5159 for (jo=0; jo<ncols_o; jo++) { 5160 col = cmap[*bj]; 5161 if (col >= cstart) break; 5162 *cam++ = *ba++; bj++; 5163 } 5164 /* diagonal portion of A */ 5165 ncols_d = ai[i+1] - ai[i]; 5166 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5167 /* off-diagonal portion of A */ 5168 for (j=jo; j<ncols_o; j++) { 5169 *cam++ = *ba++; bj++; 5170 } 5171 } 5172 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5173 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5174 PetscFunctionReturn(0); 5175 } 5176 5177 /*@C 5178 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5179 5180 Not Collective 5181 5182 Input Parameters: 5183 + A - the matrix 5184 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5185 - row, col - index sets of rows and columns to extract (or NULL) 5186 5187 Output Parameter: 5188 . A_loc - the local sequential matrix generated 5189 5190 Level: developer 5191 5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5193 5194 @*/ 5195 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5196 { 5197 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5198 PetscErrorCode ierr; 5199 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5200 IS isrowa,iscola; 5201 Mat *aloc; 5202 PetscBool match; 5203 5204 PetscFunctionBegin; 5205 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5206 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5207 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5208 if (!row) { 5209 start = A->rmap->rstart; end = A->rmap->rend; 5210 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5211 } else { 5212 isrowa = *row; 5213 } 5214 if (!col) { 5215 start = A->cmap->rstart; 5216 cmap = a->garray; 5217 nzA = a->A->cmap->n; 5218 nzB = a->B->cmap->n; 5219 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5220 ncols = 0; 5221 for (i=0; i<nzB; i++) { 5222 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5223 else break; 5224 } 5225 imark = i; 5226 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5227 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5228 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5229 } else { 5230 iscola = *col; 5231 } 5232 if (scall != MAT_INITIAL_MATRIX) { 5233 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5234 aloc[0] = *A_loc; 5235 } 5236 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5237 if (!col) { /* attach global id of condensed columns */ 5238 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5239 } 5240 *A_loc = aloc[0]; 5241 ierr = PetscFree(aloc);CHKERRQ(ierr); 5242 if (!row) { 5243 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5244 } 5245 if (!col) { 5246 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5247 } 5248 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5249 PetscFunctionReturn(0); 5250 } 5251 5252 /*@C 5253 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5254 5255 Collective on Mat 5256 5257 Input Parameters: 5258 + A,B - the matrices in mpiaij format 5259 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5260 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5261 5262 Output Parameter: 5263 + rowb, colb - index sets of rows and columns of B to extract 5264 - B_seq - the sequential matrix generated 5265 5266 Level: developer 5267 5268 @*/ 5269 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5270 { 5271 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5272 PetscErrorCode ierr; 5273 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5274 IS isrowb,iscolb; 5275 Mat *bseq=NULL; 5276 5277 PetscFunctionBegin; 5278 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5279 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5280 } 5281 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5282 5283 if (scall == MAT_INITIAL_MATRIX) { 5284 start = A->cmap->rstart; 5285 cmap = a->garray; 5286 nzA = a->A->cmap->n; 5287 nzB = a->B->cmap->n; 5288 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5289 ncols = 0; 5290 for (i=0; i<nzB; i++) { /* row < local row index */ 5291 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5292 else break; 5293 } 5294 imark = i; 5295 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5296 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5297 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5298 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5299 } else { 5300 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5301 isrowb = *rowb; iscolb = *colb; 5302 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5303 bseq[0] = *B_seq; 5304 } 5305 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5306 *B_seq = bseq[0]; 5307 ierr = PetscFree(bseq);CHKERRQ(ierr); 5308 if (!rowb) { 5309 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5310 } else { 5311 *rowb = isrowb; 5312 } 5313 if (!colb) { 5314 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5315 } else { 5316 *colb = iscolb; 5317 } 5318 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5319 PetscFunctionReturn(0); 5320 } 5321 5322 /* 5323 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5324 of the OFF-DIAGONAL portion of local A 5325 5326 Collective on Mat 5327 5328 Input Parameters: 5329 + A,B - the matrices in mpiaij format 5330 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5331 5332 Output Parameter: 5333 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5334 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5335 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5336 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5337 5338 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5339 for this matrix. This is not desirable.. 5340 5341 Level: developer 5342 5343 */ 5344 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5345 { 5346 PetscErrorCode ierr; 5347 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5348 Mat_SeqAIJ *b_oth; 5349 VecScatter ctx; 5350 MPI_Comm comm; 5351 const PetscMPIInt *rprocs,*sprocs; 5352 const PetscInt *srow,*rstarts,*sstarts; 5353 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5354 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5355 PetscScalar *b_otha,*bufa,*bufA,*vals; 5356 MPI_Request *rwaits = NULL,*swaits = NULL; 5357 MPI_Status rstatus; 5358 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5359 5360 PetscFunctionBegin; 5361 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5362 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5363 5364 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5365 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5366 } 5367 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5368 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5369 5370 if (size == 1) { 5371 startsj_s = NULL; 5372 bufa_ptr = NULL; 5373 *B_oth = NULL; 5374 PetscFunctionReturn(0); 5375 } 5376 5377 ctx = a->Mvctx; 5378 tag = ((PetscObject)ctx)->tag; 5379 5380 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5381 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5382 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5383 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5384 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5385 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5386 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5387 5388 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5389 if (scall == MAT_INITIAL_MATRIX) { 5390 /* i-array */ 5391 /*---------*/ 5392 /* post receives */ 5393 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5394 for (i=0; i<nrecvs; i++) { 5395 rowlen = rvalues + rstarts[i]*rbs; 5396 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5397 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5398 } 5399 5400 /* pack the outgoing message */ 5401 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5402 5403 sstartsj[0] = 0; 5404 rstartsj[0] = 0; 5405 len = 0; /* total length of j or a array to be sent */ 5406 if (nsends) { 5407 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5408 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5409 } 5410 for (i=0; i<nsends; i++) { 5411 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5412 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5413 for (j=0; j<nrows; j++) { 5414 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5415 for (l=0; l<sbs; l++) { 5416 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5417 5418 rowlen[j*sbs+l] = ncols; 5419 5420 len += ncols; 5421 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5422 } 5423 k++; 5424 } 5425 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5426 5427 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5428 } 5429 /* recvs and sends of i-array are completed */ 5430 i = nrecvs; 5431 while (i--) { 5432 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5433 } 5434 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5435 ierr = PetscFree(svalues);CHKERRQ(ierr); 5436 5437 /* allocate buffers for sending j and a arrays */ 5438 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5439 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5440 5441 /* create i-array of B_oth */ 5442 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5443 5444 b_othi[0] = 0; 5445 len = 0; /* total length of j or a array to be received */ 5446 k = 0; 5447 for (i=0; i<nrecvs; i++) { 5448 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5449 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5450 for (j=0; j<nrows; j++) { 5451 b_othi[k+1] = b_othi[k] + rowlen[j]; 5452 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5453 k++; 5454 } 5455 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5456 } 5457 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5458 5459 /* allocate space for j and a arrrays of B_oth */ 5460 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5461 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5462 5463 /* j-array */ 5464 /*---------*/ 5465 /* post receives of j-array */ 5466 for (i=0; i<nrecvs; i++) { 5467 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5468 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5469 } 5470 5471 /* pack the outgoing message j-array */ 5472 if (nsends) k = sstarts[0]; 5473 for (i=0; i<nsends; i++) { 5474 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5475 bufJ = bufj+sstartsj[i]; 5476 for (j=0; j<nrows; j++) { 5477 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5478 for (ll=0; ll<sbs; ll++) { 5479 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5480 for (l=0; l<ncols; l++) { 5481 *bufJ++ = cols[l]; 5482 } 5483 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5484 } 5485 } 5486 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5487 } 5488 5489 /* recvs and sends of j-array are completed */ 5490 i = nrecvs; 5491 while (i--) { 5492 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5493 } 5494 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5495 } else if (scall == MAT_REUSE_MATRIX) { 5496 sstartsj = *startsj_s; 5497 rstartsj = *startsj_r; 5498 bufa = *bufa_ptr; 5499 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5500 b_otha = b_oth->a; 5501 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5502 5503 /* a-array */ 5504 /*---------*/ 5505 /* post receives of a-array */ 5506 for (i=0; i<nrecvs; i++) { 5507 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5508 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5509 } 5510 5511 /* pack the outgoing message a-array */ 5512 if (nsends) k = sstarts[0]; 5513 for (i=0; i<nsends; i++) { 5514 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5515 bufA = bufa+sstartsj[i]; 5516 for (j=0; j<nrows; j++) { 5517 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5518 for (ll=0; ll<sbs; ll++) { 5519 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5520 for (l=0; l<ncols; l++) { 5521 *bufA++ = vals[l]; 5522 } 5523 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5524 } 5525 } 5526 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5527 } 5528 /* recvs and sends of a-array are completed */ 5529 i = nrecvs; 5530 while (i--) { 5531 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5532 } 5533 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5534 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5535 5536 if (scall == MAT_INITIAL_MATRIX) { 5537 /* put together the new matrix */ 5538 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5539 5540 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5541 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5542 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5543 b_oth->free_a = PETSC_TRUE; 5544 b_oth->free_ij = PETSC_TRUE; 5545 b_oth->nonew = 0; 5546 5547 ierr = PetscFree(bufj);CHKERRQ(ierr); 5548 if (!startsj_s || !bufa_ptr) { 5549 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5550 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5551 } else { 5552 *startsj_s = sstartsj; 5553 *startsj_r = rstartsj; 5554 *bufa_ptr = bufa; 5555 } 5556 } 5557 5558 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5559 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5560 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5561 PetscFunctionReturn(0); 5562 } 5563 5564 /*@C 5565 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5566 5567 Not Collective 5568 5569 Input Parameters: 5570 . A - The matrix in mpiaij format 5571 5572 Output Parameter: 5573 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5574 . colmap - A map from global column index to local index into lvec 5575 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5576 5577 Level: developer 5578 5579 @*/ 5580 #if defined(PETSC_USE_CTABLE) 5581 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5582 #else 5583 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5584 #endif 5585 { 5586 Mat_MPIAIJ *a; 5587 5588 PetscFunctionBegin; 5589 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5590 PetscValidPointer(lvec, 2); 5591 PetscValidPointer(colmap, 3); 5592 PetscValidPointer(multScatter, 4); 5593 a = (Mat_MPIAIJ*) A->data; 5594 if (lvec) *lvec = a->lvec; 5595 if (colmap) *colmap = a->colmap; 5596 if (multScatter) *multScatter = a->Mvctx; 5597 PetscFunctionReturn(0); 5598 } 5599 5600 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5601 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5602 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5603 #if defined(PETSC_HAVE_MKL_SPARSE) 5604 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5605 #endif 5606 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5607 #if defined(PETSC_HAVE_ELEMENTAL) 5608 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5609 #endif 5610 #if defined(PETSC_HAVE_HYPRE) 5611 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5612 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5613 #endif 5614 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5615 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5616 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5617 5618 /* 5619 Computes (B'*A')' since computing B*A directly is untenable 5620 5621 n p p 5622 ( ) ( ) ( ) 5623 m ( A ) * n ( B ) = m ( C ) 5624 ( ) ( ) ( ) 5625 5626 */ 5627 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5628 { 5629 PetscErrorCode ierr; 5630 Mat At,Bt,Ct; 5631 5632 PetscFunctionBegin; 5633 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5634 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5635 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5636 ierr = MatDestroy(&At);CHKERRQ(ierr); 5637 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5638 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5639 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5640 PetscFunctionReturn(0); 5641 } 5642 5643 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5644 { 5645 PetscErrorCode ierr; 5646 PetscInt m=A->rmap->n,n=B->cmap->n; 5647 Mat Cmat; 5648 5649 PetscFunctionBegin; 5650 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5651 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5652 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5653 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5654 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5655 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5656 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5657 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5658 5659 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5660 5661 *C = Cmat; 5662 PetscFunctionReturn(0); 5663 } 5664 5665 /* ----------------------------------------------------------------*/ 5666 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5667 { 5668 PetscErrorCode ierr; 5669 5670 PetscFunctionBegin; 5671 if (scall == MAT_INITIAL_MATRIX) { 5672 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5673 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5674 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5675 } 5676 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5677 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5678 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5679 PetscFunctionReturn(0); 5680 } 5681 5682 /*MC 5683 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5684 5685 Options Database Keys: 5686 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5687 5688 Level: beginner 5689 5690 .seealso: MatCreateAIJ() 5691 M*/ 5692 5693 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5694 { 5695 Mat_MPIAIJ *b; 5696 PetscErrorCode ierr; 5697 PetscMPIInt size; 5698 5699 PetscFunctionBegin; 5700 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5701 5702 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5703 B->data = (void*)b; 5704 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5705 B->assembled = PETSC_FALSE; 5706 B->insertmode = NOT_SET_VALUES; 5707 b->size = size; 5708 5709 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5710 5711 /* build cache for off array entries formed */ 5712 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5713 5714 b->donotstash = PETSC_FALSE; 5715 b->colmap = 0; 5716 b->garray = 0; 5717 b->roworiented = PETSC_TRUE; 5718 5719 /* stuff used for matrix vector multiply */ 5720 b->lvec = NULL; 5721 b->Mvctx = NULL; 5722 5723 /* stuff for MatGetRow() */ 5724 b->rowindices = 0; 5725 b->rowvalues = 0; 5726 b->getrowactive = PETSC_FALSE; 5727 5728 /* flexible pointer used in CUSP/CUSPARSE classes */ 5729 b->spptr = NULL; 5730 5731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5741 #if defined(PETSC_HAVE_MKL_SPARSE) 5742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5743 #endif 5744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5746 #if defined(PETSC_HAVE_ELEMENTAL) 5747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5748 #endif 5749 #if defined(PETSC_HAVE_HYPRE) 5750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5751 #endif 5752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5753 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5755 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5756 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5757 #if defined(PETSC_HAVE_HYPRE) 5758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5759 #endif 5760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5761 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5762 PetscFunctionReturn(0); 5763 } 5764 5765 /*@C 5766 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5767 and "off-diagonal" part of the matrix in CSR format. 5768 5769 Collective 5770 5771 Input Parameters: 5772 + comm - MPI communicator 5773 . m - number of local rows (Cannot be PETSC_DECIDE) 5774 . n - This value should be the same as the local size used in creating the 5775 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5776 calculated if N is given) For square matrices n is almost always m. 5777 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5778 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5779 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5780 . j - column indices 5781 . a - matrix values 5782 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5783 . oj - column indices 5784 - oa - matrix values 5785 5786 Output Parameter: 5787 . mat - the matrix 5788 5789 Level: advanced 5790 5791 Notes: 5792 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5793 must free the arrays once the matrix has been destroyed and not before. 5794 5795 The i and j indices are 0 based 5796 5797 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5798 5799 This sets local rows and cannot be used to set off-processor values. 5800 5801 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5802 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5803 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5804 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5805 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5806 communication if it is known that only local entries will be set. 5807 5808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5809 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5810 @*/ 5811 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5812 { 5813 PetscErrorCode ierr; 5814 Mat_MPIAIJ *maij; 5815 5816 PetscFunctionBegin; 5817 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5818 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5819 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5820 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5821 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5822 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5823 maij = (Mat_MPIAIJ*) (*mat)->data; 5824 5825 (*mat)->preallocated = PETSC_TRUE; 5826 5827 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5828 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5829 5830 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5831 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5832 5833 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5834 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5835 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5836 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5837 5838 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5839 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5840 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5841 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5842 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5843 PetscFunctionReturn(0); 5844 } 5845 5846 /* 5847 Special version for direct calls from Fortran 5848 */ 5849 #include <petsc/private/fortranimpl.h> 5850 5851 /* Change these macros so can be used in void function */ 5852 #undef CHKERRQ 5853 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5854 #undef SETERRQ2 5855 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5856 #undef SETERRQ3 5857 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5858 #undef SETERRQ 5859 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5860 5861 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5862 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5863 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5864 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5865 #else 5866 #endif 5867 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5868 { 5869 Mat mat = *mmat; 5870 PetscInt m = *mm, n = *mn; 5871 InsertMode addv = *maddv; 5872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5873 PetscScalar value; 5874 PetscErrorCode ierr; 5875 5876 MatCheckPreallocated(mat,1); 5877 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5878 5879 #if defined(PETSC_USE_DEBUG) 5880 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5881 #endif 5882 { 5883 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5884 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5885 PetscBool roworiented = aij->roworiented; 5886 5887 /* Some Variables required in the macro */ 5888 Mat A = aij->A; 5889 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5890 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5891 MatScalar *aa = a->a; 5892 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5893 Mat B = aij->B; 5894 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5895 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5896 MatScalar *ba = b->a; 5897 5898 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5899 PetscInt nonew = a->nonew; 5900 MatScalar *ap1,*ap2; 5901 5902 PetscFunctionBegin; 5903 for (i=0; i<m; i++) { 5904 if (im[i] < 0) continue; 5905 #if defined(PETSC_USE_DEBUG) 5906 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5907 #endif 5908 if (im[i] >= rstart && im[i] < rend) { 5909 row = im[i] - rstart; 5910 lastcol1 = -1; 5911 rp1 = aj + ai[row]; 5912 ap1 = aa + ai[row]; 5913 rmax1 = aimax[row]; 5914 nrow1 = ailen[row]; 5915 low1 = 0; 5916 high1 = nrow1; 5917 lastcol2 = -1; 5918 rp2 = bj + bi[row]; 5919 ap2 = ba + bi[row]; 5920 rmax2 = bimax[row]; 5921 nrow2 = bilen[row]; 5922 low2 = 0; 5923 high2 = nrow2; 5924 5925 for (j=0; j<n; j++) { 5926 if (roworiented) value = v[i*n+j]; 5927 else value = v[i+j*m]; 5928 if (in[j] >= cstart && in[j] < cend) { 5929 col = in[j] - cstart; 5930 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5931 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5932 } else if (in[j] < 0) continue; 5933 #if defined(PETSC_USE_DEBUG) 5934 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5935 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5936 #endif 5937 else { 5938 if (mat->was_assembled) { 5939 if (!aij->colmap) { 5940 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5941 } 5942 #if defined(PETSC_USE_CTABLE) 5943 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5944 col--; 5945 #else 5946 col = aij->colmap[in[j]] - 1; 5947 #endif 5948 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5949 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5950 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5951 col = in[j]; 5952 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5953 B = aij->B; 5954 b = (Mat_SeqAIJ*)B->data; 5955 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5956 rp2 = bj + bi[row]; 5957 ap2 = ba + bi[row]; 5958 rmax2 = bimax[row]; 5959 nrow2 = bilen[row]; 5960 low2 = 0; 5961 high2 = nrow2; 5962 bm = aij->B->rmap->n; 5963 ba = b->a; 5964 } 5965 } else col = in[j]; 5966 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5967 } 5968 } 5969 } else if (!aij->donotstash) { 5970 if (roworiented) { 5971 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5972 } else { 5973 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5974 } 5975 } 5976 } 5977 } 5978 PetscFunctionReturnVoid(); 5979 } 5980