1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 a_noinsert: ; \ 406 ailen[row] = nrow1; \ 407 } 408 409 410 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 411 { \ 412 if (col <= lastcol2) low2 = 0; \ 413 else high2 = nrow2; \ 414 lastcol2 = col; \ 415 while (high2-low2 > 5) { \ 416 t = (low2+high2)/2; \ 417 if (rp2[t] > col) high2 = t; \ 418 else low2 = t; \ 419 } \ 420 for (_i=low2; _i<high2; _i++) { \ 421 if (rp2[_i] > col) break; \ 422 if (rp2[_i] == col) { \ 423 if (addv == ADD_VALUES) ap2[_i] += value; \ 424 else ap2[_i] = value; \ 425 goto b_noinsert; \ 426 } \ 427 } \ 428 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 429 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 431 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 432 N = nrow2++ - 1; b->nz++; high2++; \ 433 /* shift up all the later entries in this row */ \ 434 for (ii=N; ii>=_i; ii--) { \ 435 rp2[ii+1] = rp2[ii]; \ 436 ap2[ii+1] = ap2[ii]; \ 437 } \ 438 rp2[_i] = col; \ 439 ap2[_i] = value; \ 440 b_noinsert: ; \ 441 bilen[row] = nrow2; \ 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 446 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 447 { 448 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 450 PetscErrorCode ierr; 451 PetscInt l,*garray = mat->garray,diag; 452 453 PetscFunctionBegin; 454 /* code only works for square matrices A */ 455 456 /* find size of row to the left of the diagonal part */ 457 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 458 row = row - diag; 459 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 460 if (garray[b->j[b->i[row]+l]] > diag) break; 461 } 462 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 463 464 /* diagonal part */ 465 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* right of diagonal part */ 468 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatSetValues_MPIAIJ" 474 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 475 { 476 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 477 PetscScalar value; 478 PetscErrorCode ierr; 479 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 480 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 481 PetscBool roworiented = aij->roworiented; 482 483 /* Some Variables required in the macro */ 484 Mat A = aij->A; 485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 486 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 487 MatScalar *aa = a->a; 488 PetscBool ignorezeroentries = a->ignorezeroentries; 489 Mat B = aij->B; 490 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 491 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 492 MatScalar *ba = b->a; 493 494 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 495 PetscInt nonew; 496 MatScalar *ap1,*ap2; 497 498 PetscFunctionBegin; 499 if (v) PetscValidScalarPointer(v,6); 500 for (i=0; i<m; i++) { 501 if (im[i] < 0) continue; 502 #if defined(PETSC_USE_DEBUG) 503 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 504 #endif 505 if (im[i] >= rstart && im[i] < rend) { 506 row = im[i] - rstart; 507 lastcol1 = -1; 508 rp1 = aj + ai[row]; 509 ap1 = aa + ai[row]; 510 rmax1 = aimax[row]; 511 nrow1 = ailen[row]; 512 low1 = 0; 513 high1 = nrow1; 514 lastcol2 = -1; 515 rp2 = bj + bi[row]; 516 ap2 = ba + bi[row]; 517 rmax2 = bimax[row]; 518 nrow2 = bilen[row]; 519 low2 = 0; 520 high2 = nrow2; 521 522 for (j=0; j<n; j++) { 523 if (v) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 } else value = 0.0; 527 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 528 if (in[j] >= cstart && in[j] < cend) { 529 col = in[j] - cstart; 530 nonew = a->nonew; 531 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 532 } else if (in[j] < 0) continue; 533 #if defined(PETSC_USE_DEBUG) 534 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 535 #endif 536 else { 537 if (mat->was_assembled) { 538 if (!aij->colmap) { 539 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 540 } 541 #if defined(PETSC_USE_CTABLE) 542 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 543 col--; 544 #else 545 col = aij->colmap[in[j]] - 1; 546 #endif 547 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 548 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 549 col = in[j]; 550 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 551 B = aij->B; 552 b = (Mat_SeqAIJ*)B->data; 553 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 554 rp2 = bj + bi[row]; 555 ap2 = ba + bi[row]; 556 rmax2 = bimax[row]; 557 nrow2 = bilen[row]; 558 low2 = 0; 559 high2 = nrow2; 560 bm = aij->B->rmap->n; 561 ba = b->a; 562 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 563 } else col = in[j]; 564 nonew = b->nonew; 565 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 566 } 567 } 568 } else { 569 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 570 if (!aij->donotstash) { 571 mat->assembled = PETSC_FALSE; 572 if (roworiented) { 573 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 574 } else { 575 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } 577 } 578 } 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatGetValues_MPIAIJ" 585 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 PetscErrorCode ierr; 589 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 590 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 591 592 PetscFunctionBegin; 593 for (i=0; i<m; i++) { 594 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 595 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 596 if (idxm[i] >= rstart && idxm[i] < rend) { 597 row = idxm[i] - rstart; 598 for (j=0; j<n; j++) { 599 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 600 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 601 if (idxn[j] >= cstart && idxn[j] < cend) { 602 col = idxn[j] - cstart; 603 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 604 } else { 605 if (!aij->colmap) { 606 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 607 } 608 #if defined(PETSC_USE_CTABLE) 609 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 610 col--; 611 #else 612 col = aij->colmap[idxn[j]] - 1; 613 #endif 614 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 615 else { 616 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 617 } 618 } 619 } 620 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 PetscFunctionReturn(0); 623 } 624 625 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 629 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt nstash,reallocs; 634 InsertMode addv; 635 636 PetscFunctionBegin; 637 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 638 639 /* make sure all processors are either in INSERTMODE or ADDMODE */ 640 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 641 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 642 mat->insertmode = addv; /* in case this processor had no cache */ 643 644 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 645 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 646 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 652 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 656 PetscErrorCode ierr; 657 PetscMPIInt n; 658 PetscInt i,j,rstart,ncols,flg; 659 PetscInt *row,*col; 660 PetscBool other_disassembled; 661 PetscScalar *val; 662 InsertMode addv = mat->insertmode; 663 664 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 665 666 PetscFunctionBegin; 667 if (!aij->donotstash && !mat->nooffprocentries) { 668 while (1) { 669 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 670 if (!flg) break; 671 672 for (i=0; i<n; ) { 673 /* Now identify the consecutive vals belonging to the same row */ 674 for (j=i,rstart=row[j]; j<n; j++) { 675 if (row[j] != rstart) break; 676 } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 682 i = j; 683 } 684 } 685 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 686 } 687 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 688 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 689 690 /* determine if any processor has disassembled, if so we must 691 also disassemble ourselfs, in order that we may reassemble. */ 692 /* 693 if nonzero structure of submatrix B cannot change then we know that 694 no processor disassembled thus we can skip this stuff 695 */ 696 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 697 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 698 if (mat->was_assembled && !other_disassembled) { 699 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 700 } 701 } 702 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 703 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 704 } 705 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 711 aij->rowvalues = 0; 712 713 /* used by MatAXPY() */ 714 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 715 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 716 717 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 718 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 724 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 725 { 726 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 731 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatZeroRows_MPIAIJ" 737 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 738 { 739 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 740 PetscInt *owners = A->rmap->range; 741 PetscInt n = A->rmap->n; 742 PetscMPIInt size = mat->size; 743 PetscSF sf; 744 PetscInt *lrows; 745 PetscSFNode *rrows; 746 PetscInt lastidx = -1, r, p = 0, len = 0; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 /* Create SF where leaves are input rows and roots are owned rows */ 751 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 752 for (r = 0; r < n; ++r) lrows[r] = -1; 753 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 754 for (r = 0; r < N; ++r) { 755 const PetscInt idx = rows[r]; 756 PetscBool found = PETSC_FALSE; 757 /* Trick for efficient searching for sorted rows */ 758 if (lastidx > idx) p = 0; 759 lastidx = idx; 760 for (; p < size; ++p) { 761 if (idx >= owners[p] && idx < owners[p+1]) { 762 rrows[r].rank = p; 763 rrows[r].index = rows[r] - owners[p]; 764 found = PETSC_TRUE; 765 break; 766 } 767 } 768 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 769 } 770 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 771 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 772 /* Collect flags for rows to be zeroed */ 773 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 774 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 775 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 776 /* Compress and put in row numbers */ 777 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 778 /* fix right hand side if needed */ 779 if (x && b) { 780 const PetscScalar *xx; 781 PetscScalar *bb; 782 783 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 786 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 788 } 789 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 790 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 791 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 792 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 793 } else if (diag != 0.0) { 794 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 796 for (r = 0; r < len; ++r) { 797 const PetscInt row = lrows[r] + A->rmap->rstart; 798 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 799 } 800 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 801 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 802 } else { 803 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(lrows);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 811 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 812 { 813 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 814 PetscErrorCode ierr; 815 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 816 PetscInt i,j,r,m,p = 0,len = 0; 817 PetscInt *lrows,*owners = A->rmap->range; 818 PetscSFNode *rrows; 819 PetscSF sf; 820 const PetscScalar *xx; 821 PetscScalar *bb,*mask; 822 Vec xmask,lmask; 823 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 824 const PetscInt *aj, *ii,*ridx; 825 PetscScalar *aa; 826 #if defined(PETSC_DEBUG) 827 PetscBool found = PETSC_FALSE; 828 #endif 829 830 PetscFunctionBegin; 831 /* Create SF where leaves are input rows and roots are owned rows */ 832 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 833 for (r = 0; r < n; ++r) lrows[r] = -1; 834 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 835 for (r = 0; r < N; ++r) { 836 const PetscInt idx = rows[r]; 837 PetscBool found = PETSC_FALSE; 838 /* Trick for efficient searching for sorted rows */ 839 if (lastidx > idx) p = 0; 840 lastidx = idx; 841 for (; p < size; ++p) { 842 if (idx >= owners[p] && idx < owners[p+1]) { 843 rrows[r].rank = p; 844 rrows[r].index = rows[r] - owners[p]; 845 found = PETSC_TRUE; 846 break; 847 } 848 } 849 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatMult_MPIAIJ" 930 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 931 { 932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 933 PetscErrorCode ierr; 934 PetscInt nt; 935 936 PetscFunctionBegin; 937 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 938 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 941 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 948 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 949 { 950 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatMultAdd_MPIAIJ" 960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 961 { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 968 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 975 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 976 { 977 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 978 PetscErrorCode ierr; 979 PetscBool merged; 980 981 PetscFunctionBegin; 982 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 983 /* do nondiagonal part */ 984 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 985 if (!merged) { 986 /* send it on its way */ 987 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 988 /* do local part */ 989 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 990 /* receive remote parts: note this assumes the values are not actually */ 991 /* added in yy until the next line, */ 992 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 } else { 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* send it on its way */ 997 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 998 /* values actually were received in the Begin() but we need to call this nop */ 999 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1007 { 1008 MPI_Comm comm; 1009 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1010 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1011 IS Me,Notme; 1012 PetscErrorCode ierr; 1013 PetscInt M,N,first,last,*notme,i; 1014 PetscMPIInt size; 1015 1016 PetscFunctionBegin; 1017 /* Easy test: symmetric diagonal block */ 1018 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1019 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1020 if (!*f) PetscFunctionReturn(0); 1021 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1023 if (size == 1) PetscFunctionReturn(0); 1024 1025 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1026 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1027 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1028 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1029 for (i=0; i<first; i++) notme[i] = i; 1030 for (i=last; i<M; i++) notme[i-last+first] = i; 1031 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1032 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1033 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1034 Aoff = Aoffs[0]; 1035 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1036 Boff = Boffs[0]; 1037 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1039 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1041 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1042 ierr = PetscFree(notme);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1048 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 /* do nondiagonal part */ 1055 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1056 /* send it on its way */ 1057 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1058 /* do local part */ 1059 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1060 /* receive remote parts */ 1061 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* 1066 This only works correctly for square matrices where the subblock A->A is the 1067 diagonal block 1068 */ 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 PetscErrorCode ierr; 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1075 1076 PetscFunctionBegin; 1077 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1078 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1079 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatScale_MPIAIJ" 1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1086 { 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1092 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIAIJ" 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1120 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1121 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1122 1123 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1141 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1142 PetscErrorCode ierr; 1143 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1144 int fd; 1145 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1146 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1147 PetscScalar *column_values; 1148 PetscInt message_count,flowcontrolcount; 1149 FILE *file; 1150 1151 PetscFunctionBegin; 1152 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1153 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1154 nz = A->nz + B->nz; 1155 if (!rank) { 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = mat->rmap->N; 1158 header[2] = mat->cmap->N; 1159 1160 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1163 /* get largest number of rows any processor has */ 1164 rlen = mat->rmap->n; 1165 range = mat->rmap->range; 1166 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1167 } else { 1168 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1169 rlen = mat->rmap->n; 1170 } 1171 1172 /* load up the local row counts */ 1173 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1174 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 for (i=1; i<size; i++) { 1181 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1182 rlen = range[i+1] - range[i]; 1183 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1189 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1193 1194 /* load up the local column indices */ 1195 nzmax = nz; /* th processor needs space a largest processor needs */ 1196 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<mat->rmap->n; i++) { 1200 for (j=B->i[i]; j<B->i[i+1]; j++) { 1201 if ((col = garray[B->j[j]]) > cstart) break; 1202 column_indices[cnt++] = col; 1203 } 1204 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1205 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1206 } 1207 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1208 1209 /* store the column indices to the file */ 1210 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1211 if (!rank) { 1212 MPI_Status status; 1213 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 for (i=1; i<size; i++) { 1215 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1216 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1217 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1218 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227 } 1228 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1229 1230 /* load up the local column values */ 1231 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1232 cnt = 0; 1233 for (i=0; i<mat->rmap->n; i++) { 1234 for (j=B->i[i]; j<B->i[i+1]; j++) { 1235 if (garray[B->j[j]] > cstart) break; 1236 column_values[cnt++] = B->a[j]; 1237 } 1238 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1239 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1240 } 1241 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1242 1243 /* store the column values to the file */ 1244 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1245 if (!rank) { 1246 MPI_Status status; 1247 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1248 for (i=1; i<size; i++) { 1249 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1250 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1251 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1252 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 } 1255 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1256 } else { 1257 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1258 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(column_values);CHKERRQ(ierr); 1263 1264 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1265 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include <petscdraw.h> 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1275 PetscErrorCode ierr; 1276 PetscMPIInt rank = aij->rank,size = aij->size; 1277 PetscBool isdraw,iascii,isbinary; 1278 PetscViewer sviewer; 1279 PetscViewerFormat format; 1280 1281 PetscFunctionBegin; 1282 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1283 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1284 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1285 if (iascii) { 1286 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1287 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1288 MatInfo info; 1289 PetscBool inodes; 1290 1291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1292 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1293 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1294 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1295 if (!inodes) { 1296 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } 1302 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1304 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1306 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1309 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1312 PetscInt inodecount,inodelimit,*inodes; 1313 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1314 if (inodes) { 1315 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1316 } else { 1317 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1321 PetscFunctionReturn(0); 1322 } 1323 } else if (isbinary) { 1324 if (size == 1) { 1325 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1326 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1327 } else { 1328 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1336 } 1337 1338 if (size == 1) { 1339 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1340 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1341 } else { 1342 /* assemble the entire matrix onto first processor. */ 1343 Mat A; 1344 Mat_SeqAIJ *Aloc; 1345 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1346 MatScalar *a; 1347 1348 if (mat->rmap->N > 1024) { 1349 PetscBool flg = PETSC_FALSE; 1350 1351 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1352 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1353 } 1354 1355 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1358 } else { 1359 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1360 } 1361 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1362 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1363 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1364 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1365 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1366 1367 /* copy over the A part */ 1368 Aloc = (Mat_SeqAIJ*)aij->A->data; 1369 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1370 row = mat->rmap->rstart; 1371 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1372 for (i=0; i<m; i++) { 1373 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1374 row++; 1375 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1376 } 1377 aj = Aloc->j; 1378 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1379 1380 /* copy over the B part */ 1381 Aloc = (Mat_SeqAIJ*)aij->B->data; 1382 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1383 row = mat->rmap->rstart; 1384 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1385 ct = cols; 1386 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1387 for (i=0; i<m; i++) { 1388 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1389 row++; 1390 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1391 } 1392 ierr = PetscFree(ct);CHKERRQ(ierr); 1393 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 /* 1396 Everyone has to call to draw the matrix since the graphics waits are 1397 synchronized across all processors that share the PetscDraw object 1398 */ 1399 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1400 if (!rank) { 1401 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1402 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1403 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1404 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1405 } 1406 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A);CHKERRQ(ierr); 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatView_MPIAIJ" 1414 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1415 { 1416 PetscErrorCode ierr; 1417 PetscBool iascii,isdraw,issocket,isbinary; 1418 1419 PetscFunctionBegin; 1420 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1421 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1422 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1424 if (iascii || isdraw || isbinary || issocket) { 1425 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatSOR_MPIAIJ" 1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1433 { 1434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1435 PetscErrorCode ierr; 1436 Vec bb1 = 0; 1437 PetscBool hasop; 1438 1439 PetscFunctionBegin; 1440 if (flag == SOR_APPLY_UPPER) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1446 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1447 } 1448 1449 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 its--; 1453 } 1454 1455 while (its--) { 1456 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1461 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1462 1463 /* local sweep */ 1464 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1465 } 1466 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1469 its--; 1470 } 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_EISENSTAT) { 1499 Vec xx1; 1500 1501 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1503 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 if (!mat->diag) { 1507 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1508 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1509 } 1510 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1511 if (hasop) { 1512 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1513 } else { 1514 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1515 } 1516 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1517 1518 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1522 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1523 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1524 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1525 1526 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatPermute_MPIAIJ" 1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1533 { 1534 Mat aA,aB,Aperm; 1535 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1536 PetscScalar *aa,*ba; 1537 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1538 PetscSF rowsf,sf; 1539 IS parcolp = NULL; 1540 PetscBool done; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1545 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1546 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1547 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1548 1549 /* Invert row permutation to find out where my rows should go */ 1550 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1551 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1552 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1553 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1554 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1555 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1556 1557 /* Invert column permutation to find out where my columns should go */ 1558 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1559 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1560 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1561 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1562 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1563 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1565 1566 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1567 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1568 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1569 1570 /* Find out where my gcols should go */ 1571 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1573 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1574 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1575 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1576 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1577 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1578 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1579 1580 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1581 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1582 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1583 for (i=0; i<m; i++) { 1584 PetscInt row = rdest[i],rowner; 1585 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1586 for (j=ai[i]; j<ai[i+1]; j++) { 1587 PetscInt cowner,col = cdest[aj[j]]; 1588 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 for (j=bi[i]; j<bi[i+1]; j++) { 1593 PetscInt cowner,col = gcdest[bj[j]]; 1594 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1595 if (rowner == cowner) dnnz[i]++; 1596 else onnz[i]++; 1597 } 1598 } 1599 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1600 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1601 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1602 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1603 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1604 1605 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1606 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1607 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1608 for (i=0; i<m; i++) { 1609 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1610 PetscInt rowlen; 1611 rowlen = ai[i+1] - ai[i]; 1612 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1613 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1614 rowlen = bi[i+1] - bi[i]; 1615 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1616 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1617 } 1618 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1619 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1620 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1621 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1622 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1623 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1624 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1625 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1626 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1627 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1628 *B = Aperm; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1634 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1635 { 1636 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1637 Mat A = mat->A,B = mat->B; 1638 PetscErrorCode ierr; 1639 PetscReal isend[5],irecv[5]; 1640 1641 PetscFunctionBegin; 1642 info->block_size = 1.0; 1643 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1644 1645 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1646 isend[3] = info->memory; isend[4] = info->mallocs; 1647 1648 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1649 1650 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1651 isend[3] += info->memory; isend[4] += info->mallocs; 1652 if (flag == MAT_LOCAL) { 1653 info->nz_used = isend[0]; 1654 info->nz_allocated = isend[1]; 1655 info->nz_unneeded = isend[2]; 1656 info->memory = isend[3]; 1657 info->mallocs = isend[4]; 1658 } else if (flag == MAT_GLOBAL_MAX) { 1659 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1660 1661 info->nz_used = irecv[0]; 1662 info->nz_allocated = irecv[1]; 1663 info->nz_unneeded = irecv[2]; 1664 info->memory = irecv[3]; 1665 info->mallocs = irecv[4]; 1666 } else if (flag == MAT_GLOBAL_SUM) { 1667 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1668 1669 info->nz_used = irecv[0]; 1670 info->nz_allocated = irecv[1]; 1671 info->nz_unneeded = irecv[2]; 1672 info->memory = irecv[3]; 1673 info->mallocs = irecv[4]; 1674 } 1675 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1676 info->fill_ratio_needed = 0; 1677 info->factor_mallocs = 0; 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatSetOption_MPIAIJ" 1683 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1684 { 1685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 1688 PetscFunctionBegin; 1689 switch (op) { 1690 case MAT_NEW_NONZERO_LOCATIONS: 1691 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1692 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1693 case MAT_KEEP_NONZERO_PATTERN: 1694 case MAT_NEW_NONZERO_LOCATION_ERR: 1695 case MAT_USE_INODES: 1696 case MAT_IGNORE_ZERO_ENTRIES: 1697 MatCheckPreallocated(A,1); 1698 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1699 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1700 break; 1701 case MAT_ROW_ORIENTED: 1702 a->roworiented = flg; 1703 1704 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1705 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1706 break; 1707 case MAT_NEW_DIAGONALS: 1708 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1709 break; 1710 case MAT_IGNORE_OFF_PROC_ENTRIES: 1711 a->donotstash = flg; 1712 break; 1713 case MAT_SPD: 1714 A->spd_set = PETSC_TRUE; 1715 A->spd = flg; 1716 if (flg) { 1717 A->symmetric = PETSC_TRUE; 1718 A->structurally_symmetric = PETSC_TRUE; 1719 A->symmetric_set = PETSC_TRUE; 1720 A->structurally_symmetric_set = PETSC_TRUE; 1721 } 1722 break; 1723 case MAT_SYMMETRIC: 1724 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1725 break; 1726 case MAT_STRUCTURALLY_SYMMETRIC: 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_HERMITIAN: 1730 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1731 break; 1732 case MAT_SYMMETRY_ETERNAL: 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 default: 1736 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatGetRow_MPIAIJ" 1743 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1744 { 1745 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1746 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1747 PetscErrorCode ierr; 1748 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1749 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1750 PetscInt *cmap,*idx_p; 1751 1752 PetscFunctionBegin; 1753 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1754 mat->getrowactive = PETSC_TRUE; 1755 1756 if (!mat->rowvalues && (idx || v)) { 1757 /* 1758 allocate enough space to hold information from the longest row. 1759 */ 1760 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1761 PetscInt max = 1,tmp; 1762 for (i=0; i<matin->rmap->n; i++) { 1763 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1764 if (max < tmp) max = tmp; 1765 } 1766 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1767 } 1768 1769 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1770 lrow = row - rstart; 1771 1772 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1773 if (!v) {pvA = 0; pvB = 0;} 1774 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1775 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1776 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1777 nztot = nzA + nzB; 1778 1779 cmap = mat->garray; 1780 if (v || idx) { 1781 if (nztot) { 1782 /* Sort by increasing column numbers, assuming A and B already sorted */ 1783 PetscInt imark = -1; 1784 if (v) { 1785 *v = v_p = mat->rowvalues; 1786 for (i=0; i<nzB; i++) { 1787 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1788 else break; 1789 } 1790 imark = i; 1791 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1792 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1793 } 1794 if (idx) { 1795 *idx = idx_p = mat->rowindices; 1796 if (imark > -1) { 1797 for (i=0; i<imark; i++) { 1798 idx_p[i] = cmap[cworkB[i]]; 1799 } 1800 } else { 1801 for (i=0; i<nzB; i++) { 1802 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1803 else break; 1804 } 1805 imark = i; 1806 } 1807 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1808 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 if (idx) *idx = 0; 1812 if (v) *v = 0; 1813 } 1814 } 1815 *nz = nztot; 1816 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1817 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1823 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1826 1827 PetscFunctionBegin; 1828 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1829 aij->getrowactive = PETSC_FALSE; 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatNorm_MPIAIJ" 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1839 PetscErrorCode ierr; 1840 PetscInt i,j,cstart = mat->cmap->rstart; 1841 PetscReal sum = 0.0; 1842 MatScalar *v; 1843 1844 PetscFunctionBegin; 1845 if (aij->size == 1) { 1846 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1847 } else { 1848 if (type == NORM_FROBENIUS) { 1849 v = amat->a; 1850 for (i=0; i<amat->nz; i++) { 1851 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1852 } 1853 v = bmat->a; 1854 for (i=0; i<bmat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1858 *norm = PetscSqrtReal(*norm); 1859 } else if (type == NORM_1) { /* max column norm */ 1860 PetscReal *tmp,*tmp2; 1861 PetscInt *jj,*garray = aij->garray; 1862 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1863 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1864 *norm = 0.0; 1865 v = amat->a; jj = amat->j; 1866 for (j=0; j<amat->nz; j++) { 1867 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1868 } 1869 v = bmat->a; jj = bmat->j; 1870 for (j=0; j<bmat->nz; j++) { 1871 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1872 } 1873 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1874 for (j=0; j<mat->cmap->N; j++) { 1875 if (tmp2[j] > *norm) *norm = tmp2[j]; 1876 } 1877 ierr = PetscFree(tmp);CHKERRQ(ierr); 1878 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1879 } else if (type == NORM_INFINITY) { /* max row norm */ 1880 PetscReal ntemp = 0.0; 1881 for (j=0; j<aij->A->rmap->n; j++) { 1882 v = amat->a + amat->i[j]; 1883 sum = 0.0; 1884 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1885 sum += PetscAbsScalar(*v); v++; 1886 } 1887 v = bmat->a + bmat->i[j]; 1888 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1889 sum += PetscAbsScalar(*v); v++; 1890 } 1891 if (sum > ntemp) ntemp = sum; 1892 } 1893 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1894 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatTranspose_MPIAIJ" 1901 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1902 { 1903 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1904 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1905 PetscErrorCode ierr; 1906 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1907 PetscInt cstart = A->cmap->rstart,ncol; 1908 Mat B; 1909 MatScalar *array; 1910 1911 PetscFunctionBegin; 1912 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1913 1914 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1915 ai = Aloc->i; aj = Aloc->j; 1916 bi = Bloc->i; bj = Bloc->j; 1917 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1918 PetscInt *d_nnz,*g_nnz,*o_nnz; 1919 PetscSFNode *oloc; 1920 PETSC_UNUSED PetscSF sf; 1921 1922 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1923 /* compute d_nnz for preallocation */ 1924 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1925 for (i=0; i<ai[ma]; i++) { 1926 d_nnz[aj[i]]++; 1927 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1928 } 1929 /* compute local off-diagonal contributions */ 1930 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1931 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1932 /* map those to global */ 1933 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1934 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1935 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1936 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1937 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1938 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1939 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1940 1941 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1942 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1943 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1944 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1945 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1946 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1947 } else { 1948 B = *matout; 1949 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1950 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1951 } 1952 1953 /* copy over the A part */ 1954 array = Aloc->a; 1955 row = A->rmap->rstart; 1956 for (i=0; i<ma; i++) { 1957 ncol = ai[i+1]-ai[i]; 1958 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1959 row++; 1960 array += ncol; aj += ncol; 1961 } 1962 aj = Aloc->j; 1963 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1964 1965 /* copy over the B part */ 1966 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1967 array = Bloc->a; 1968 row = A->rmap->rstart; 1969 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1970 cols_tmp = cols; 1971 for (i=0; i<mb; i++) { 1972 ncol = bi[i+1]-bi[i]; 1973 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1974 row++; 1975 array += ncol; cols_tmp += ncol; 1976 } 1977 ierr = PetscFree(cols);CHKERRQ(ierr); 1978 1979 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1982 *matout = B; 1983 } else { 1984 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1991 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1992 { 1993 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1994 Mat a = aij->A,b = aij->B; 1995 PetscErrorCode ierr; 1996 PetscInt s1,s2,s3; 1997 1998 PetscFunctionBegin; 1999 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2000 if (rr) { 2001 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2002 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2003 /* Overlap communication with computation. */ 2004 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2005 } 2006 if (ll) { 2007 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2008 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2009 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2010 } 2011 /* scale the diagonal block */ 2012 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2013 2014 if (rr) { 2015 /* Do a scatter end and then right scale the off-diagonal block */ 2016 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2024 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2025 { 2026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatEqual_MPIAIJ" 2036 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2037 { 2038 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2039 Mat a,b,c,d; 2040 PetscBool flg; 2041 PetscErrorCode ierr; 2042 2043 PetscFunctionBegin; 2044 a = matA->A; b = matA->B; 2045 c = matB->A; d = matB->B; 2046 2047 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2048 if (flg) { 2049 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2050 } 2051 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatCopy_MPIAIJ" 2057 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2058 { 2059 PetscErrorCode ierr; 2060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2061 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2062 2063 PetscFunctionBegin; 2064 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2065 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2066 /* because of the column compression in the off-processor part of the matrix a->B, 2067 the number of columns in a->B and b->B may be different, hence we cannot call 2068 the MatCopy() directly on the two parts. If need be, we can provide a more 2069 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2070 then copying the submatrices */ 2071 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2072 } else { 2073 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2074 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatSetUp_MPIAIJ" 2081 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2082 { 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2094 { 2095 PetscInt i,m=Y->rmap->N; 2096 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2097 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2098 const PetscInt *xi = x->i,*yi = y->i; 2099 2100 PetscFunctionBegin; 2101 /* Set the number of nonzeros in the new matrix */ 2102 for (i=0; i<m; i++) { 2103 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2104 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2105 nnz[i] = 0; 2106 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2107 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2108 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2109 nnz[i]++; 2110 } 2111 for (; k<nzy; k++) nnz[i]++; 2112 } 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatAXPY_MPIAIJ" 2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 PetscInt i; 2122 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2123 PetscBLASInt bnz,one=1; 2124 Mat_SeqAIJ *x,*y; 2125 2126 PetscFunctionBegin; 2127 if (str == SAME_NONZERO_PATTERN) { 2128 PetscScalar alpha = a; 2129 x = (Mat_SeqAIJ*)xx->A->data; 2130 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2131 y = (Mat_SeqAIJ*)yy->A->data; 2132 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2133 x = (Mat_SeqAIJ*)xx->B->data; 2134 y = (Mat_SeqAIJ*)yy->B->data; 2135 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2136 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2137 } else if (str == SUBSET_NONZERO_PATTERN) { 2138 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2139 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 if (y->xtoy && y->XtoY != xx->B) { 2143 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2144 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2145 } 2146 if (!y->xtoy) { /* get xtoy */ 2147 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2148 y->XtoY = xx->B; 2149 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2150 } 2151 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2152 } else { 2153 Mat B; 2154 PetscInt *nnz_d,*nnz_o; 2155 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2156 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2157 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2158 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2159 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2160 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2161 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2162 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2163 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2164 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2165 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2166 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2167 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2168 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2169 } 2170 PetscFunctionReturn(0); 2171 } 2172 2173 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatConjugate_MPIAIJ" 2177 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2178 { 2179 #if defined(PETSC_USE_COMPLEX) 2180 PetscErrorCode ierr; 2181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2182 2183 PetscFunctionBegin; 2184 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2185 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2186 #else 2187 PetscFunctionBegin; 2188 #endif 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatRealPart_MPIAIJ" 2194 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2201 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2207 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2214 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 #if defined(PETSC_HAVE_PBGL) 2219 2220 #include <boost/parallel/mpi/bsp_process_group.hpp> 2221 #include <boost/graph/distributed/ilu_default_graph.hpp> 2222 #include <boost/graph/distributed/ilu_0_block.hpp> 2223 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2224 #include <boost/graph/distributed/petsc/interface.hpp> 2225 #include <boost/multi_array.hpp> 2226 #include <boost/parallel/distributed_property_map->hpp> 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2230 /* 2231 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2232 */ 2233 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2234 { 2235 namespace petsc = boost::distributed::petsc; 2236 2237 namespace graph_dist = boost::graph::distributed; 2238 using boost::graph::distributed::ilu_default::process_group_type; 2239 using boost::graph::ilu_permuted; 2240 2241 PetscBool row_identity, col_identity; 2242 PetscContainer c; 2243 PetscInt m, n, M, N; 2244 PetscErrorCode ierr; 2245 2246 PetscFunctionBegin; 2247 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2248 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2249 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2250 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2251 2252 process_group_type pg; 2253 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2254 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2255 lgraph_type& level_graph = *lgraph_p; 2256 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2257 2258 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2259 ilu_permuted(level_graph); 2260 2261 /* put together the new matrix */ 2262 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2263 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2264 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2265 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2266 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2267 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2268 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2269 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 2271 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2272 ierr = PetscContainerSetPointer(c, lgraph_p); 2273 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2274 ierr = PetscContainerDestroy(&c); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2280 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2281 { 2282 PetscFunctionBegin; 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatSolve_MPIAIJ" 2288 /* 2289 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2290 */ 2291 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2292 { 2293 namespace graph_dist = boost::graph::distributed; 2294 2295 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2296 lgraph_type *lgraph_p; 2297 PetscContainer c; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2302 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2303 ierr = VecCopy(b, x);CHKERRQ(ierr); 2304 2305 PetscScalar *array_x; 2306 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2307 PetscInt sx; 2308 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2309 2310 PetscScalar *array_b; 2311 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2312 PetscInt sb; 2313 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2314 2315 lgraph_type& level_graph = *lgraph_p; 2316 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2317 2318 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2319 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2320 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2321 2322 typedef boost::iterator_property_map<array_ref_type::iterator, 2323 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2324 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2325 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2326 2327 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2328 PetscFunctionReturn(0); 2329 } 2330 #endif 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatDestroy_MatRedundant" 2334 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2335 { 2336 PetscErrorCode ierr; 2337 Mat_Redundant *redund; 2338 PetscInt i; 2339 PetscMPIInt size; 2340 2341 PetscFunctionBegin; 2342 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2343 if (size == 1) { 2344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2345 redund = a->redundant; 2346 } else { 2347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2348 redund = a->redundant; 2349 } 2350 if (redund){ 2351 if (redund->matseq) { /* via MatGetSubMatrices() */ 2352 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2353 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2354 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2355 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2356 } else { 2357 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2358 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2359 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2360 for (i=0; i<redund->nrecvs; i++) { 2361 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2362 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2363 } 2364 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2365 } 2366 2367 if (redund->psubcomm) { 2368 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2369 } 2370 ierr = redund->Destroy(A);CHKERRQ(ierr); 2371 ierr = PetscFree(redund);CHKERRQ(ierr); 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2378 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2379 { 2380 PetscMPIInt rank,size; 2381 MPI_Comm comm; 2382 PetscErrorCode ierr; 2383 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2384 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2385 PetscInt *rowrange = mat->rmap->range; 2386 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2387 Mat A = aij->A,B=aij->B,C=*matredundant; 2388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2389 PetscScalar *sbuf_a; 2390 PetscInt nzlocal=a->nz+b->nz; 2391 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2392 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2393 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2394 MatScalar *aworkA,*aworkB; 2395 PetscScalar *vals; 2396 PetscMPIInt tag1,tag2,tag3,imdex; 2397 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2398 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2399 MPI_Status recv_status,*send_status; 2400 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2401 PetscInt **rbuf_j=NULL; 2402 PetscScalar **rbuf_a=NULL; 2403 Mat_Redundant *redund =NULL; 2404 2405 PetscFunctionBegin; 2406 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2407 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2408 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2409 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2410 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2411 2412 if (reuse == MAT_REUSE_MATRIX) { 2413 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2414 if (subsize == 1) { 2415 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2416 redund = c->redundant; 2417 } else { 2418 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2419 redund = c->redundant; 2420 } 2421 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2422 2423 nsends = redund->nsends; 2424 nrecvs = redund->nrecvs; 2425 send_rank = redund->send_rank; 2426 recv_rank = redund->recv_rank; 2427 sbuf_nz = redund->sbuf_nz; 2428 rbuf_nz = redund->rbuf_nz; 2429 sbuf_j = redund->sbuf_j; 2430 sbuf_a = redund->sbuf_a; 2431 rbuf_j = redund->rbuf_j; 2432 rbuf_a = redund->rbuf_a; 2433 } 2434 2435 if (reuse == MAT_INITIAL_MATRIX) { 2436 PetscInt nleftover,np_subcomm; 2437 2438 /* get the destination processors' id send_rank, nsends and nrecvs */ 2439 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2440 2441 np_subcomm = size/nsubcomm; 2442 nleftover = size - nsubcomm*np_subcomm; 2443 2444 /* block of codes below is specific for INTERLACED */ 2445 /* ------------------------------------------------*/ 2446 nsends = 0; nrecvs = 0; 2447 for (i=0; i<size; i++) { 2448 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2449 send_rank[nsends++] = i; 2450 recv_rank[nrecvs++] = i; 2451 } 2452 } 2453 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2454 i = size-nleftover-1; 2455 j = 0; 2456 while (j < nsubcomm - nleftover) { 2457 send_rank[nsends++] = i; 2458 i--; j++; 2459 } 2460 } 2461 2462 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2463 for (i=0; i<nleftover; i++) { 2464 recv_rank[nrecvs++] = size-nleftover+i; 2465 } 2466 } 2467 /*----------------------------------------------*/ 2468 2469 /* allocate sbuf_j, sbuf_a */ 2470 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2471 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2472 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2473 /* 2474 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2475 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2476 */ 2477 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2478 2479 /* copy mat's local entries into the buffers */ 2480 if (reuse == MAT_INITIAL_MATRIX) { 2481 rownz_max = 0; 2482 rptr = sbuf_j; 2483 cols = sbuf_j + rend-rstart + 1; 2484 vals = sbuf_a; 2485 rptr[0] = 0; 2486 for (i=0; i<rend-rstart; i++) { 2487 row = i + rstart; 2488 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2489 ncols = nzA + nzB; 2490 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2491 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2492 /* load the column indices for this row into cols */ 2493 lwrite = 0; 2494 for (l=0; l<nzB; l++) { 2495 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2496 vals[lwrite] = aworkB[l]; 2497 cols[lwrite++] = ctmp; 2498 } 2499 } 2500 for (l=0; l<nzA; l++) { 2501 vals[lwrite] = aworkA[l]; 2502 cols[lwrite++] = cstart + cworkA[l]; 2503 } 2504 for (l=0; l<nzB; l++) { 2505 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2506 vals[lwrite] = aworkB[l]; 2507 cols[lwrite++] = ctmp; 2508 } 2509 } 2510 vals += ncols; 2511 cols += ncols; 2512 rptr[i+1] = rptr[i] + ncols; 2513 if (rownz_max < ncols) rownz_max = ncols; 2514 } 2515 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2516 } else { /* only copy matrix values into sbuf_a */ 2517 rptr = sbuf_j; 2518 vals = sbuf_a; 2519 rptr[0] = 0; 2520 for (i=0; i<rend-rstart; i++) { 2521 row = i + rstart; 2522 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2523 ncols = nzA + nzB; 2524 cworkB = b->j + b->i[i]; 2525 aworkA = a->a + a->i[i]; 2526 aworkB = b->a + b->i[i]; 2527 lwrite = 0; 2528 for (l=0; l<nzB; l++) { 2529 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2530 } 2531 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2532 for (l=0; l<nzB; l++) { 2533 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2534 } 2535 vals += ncols; 2536 rptr[i+1] = rptr[i] + ncols; 2537 } 2538 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2539 2540 /* send nzlocal to others, and recv other's nzlocal */ 2541 /*--------------------------------------------------*/ 2542 if (reuse == MAT_INITIAL_MATRIX) { 2543 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2544 2545 s_waits2 = s_waits3 + nsends; 2546 s_waits1 = s_waits2 + nsends; 2547 r_waits1 = s_waits1 + nsends; 2548 r_waits2 = r_waits1 + nrecvs; 2549 r_waits3 = r_waits2 + nrecvs; 2550 } else { 2551 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2552 2553 r_waits3 = s_waits3 + nsends; 2554 } 2555 2556 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2557 if (reuse == MAT_INITIAL_MATRIX) { 2558 /* get new tags to keep the communication clean */ 2559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2561 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2562 2563 /* post receives of other's nzlocal */ 2564 for (i=0; i<nrecvs; i++) { 2565 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2566 } 2567 /* send nzlocal to others */ 2568 for (i=0; i<nsends; i++) { 2569 sbuf_nz[i] = nzlocal; 2570 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2571 } 2572 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2573 count = nrecvs; 2574 while (count) { 2575 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2576 2577 recv_rank[imdex] = recv_status.MPI_SOURCE; 2578 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2579 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2580 2581 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2582 2583 rbuf_nz[imdex] += i + 2; 2584 2585 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2586 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2587 count--; 2588 } 2589 /* wait on sends of nzlocal */ 2590 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2591 /* send mat->i,j to others, and recv from other's */ 2592 /*------------------------------------------------*/ 2593 for (i=0; i<nsends; i++) { 2594 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2595 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2596 } 2597 /* wait on receives of mat->i,j */ 2598 /*------------------------------*/ 2599 count = nrecvs; 2600 while (count) { 2601 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2602 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2603 count--; 2604 } 2605 /* wait on sends of mat->i,j */ 2606 /*---------------------------*/ 2607 if (nsends) { 2608 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2609 } 2610 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2611 2612 /* post receives, send and receive mat->a */ 2613 /*----------------------------------------*/ 2614 for (imdex=0; imdex<nrecvs; imdex++) { 2615 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2616 } 2617 for (i=0; i<nsends; i++) { 2618 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2619 } 2620 count = nrecvs; 2621 while (count) { 2622 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2623 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2624 count--; 2625 } 2626 if (nsends) { 2627 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2628 } 2629 2630 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2631 2632 /* create redundant matrix */ 2633 /*-------------------------*/ 2634 if (reuse == MAT_INITIAL_MATRIX) { 2635 const PetscInt *range; 2636 PetscInt rstart_sub,rend_sub,mloc_sub; 2637 2638 /* compute rownz_max for preallocation */ 2639 for (imdex=0; imdex<nrecvs; imdex++) { 2640 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2641 rptr = rbuf_j[imdex]; 2642 for (i=0; i<j; i++) { 2643 ncols = rptr[i+1] - rptr[i]; 2644 if (rownz_max < ncols) rownz_max = ncols; 2645 } 2646 } 2647 2648 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2649 2650 /* get local size of redundant matrix 2651 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2652 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2653 rstart_sub = range[nsubcomm*subrank]; 2654 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2655 rend_sub = range[nsubcomm*(subrank+1)]; 2656 } else { 2657 rend_sub = mat->rmap->N; 2658 } 2659 mloc_sub = rend_sub - rstart_sub; 2660 2661 if (M == N) { 2662 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2663 } else { /* non-square matrix */ 2664 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2665 } 2666 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2667 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2668 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2669 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2670 } else { 2671 C = *matredundant; 2672 } 2673 2674 /* insert local matrix entries */ 2675 rptr = sbuf_j; 2676 cols = sbuf_j + rend-rstart + 1; 2677 vals = sbuf_a; 2678 for (i=0; i<rend-rstart; i++) { 2679 row = i + rstart; 2680 ncols = rptr[i+1] - rptr[i]; 2681 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2682 vals += ncols; 2683 cols += ncols; 2684 } 2685 /* insert received matrix entries */ 2686 for (imdex=0; imdex<nrecvs; imdex++) { 2687 rstart = rowrange[recv_rank[imdex]]; 2688 rend = rowrange[recv_rank[imdex]+1]; 2689 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2690 rptr = rbuf_j[imdex]; 2691 cols = rbuf_j[imdex] + rend-rstart + 1; 2692 vals = rbuf_a[imdex]; 2693 for (i=0; i<rend-rstart; i++) { 2694 row = i + rstart; 2695 ncols = rptr[i+1] - rptr[i]; 2696 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2697 vals += ncols; 2698 cols += ncols; 2699 } 2700 } 2701 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2702 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2703 2704 if (reuse == MAT_INITIAL_MATRIX) { 2705 *matredundant = C; 2706 2707 /* create a supporting struct and attach it to C for reuse */ 2708 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2709 if (subsize == 1) { 2710 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2711 c->redundant = redund; 2712 } else { 2713 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2714 c->redundant = redund; 2715 } 2716 2717 redund->nzlocal = nzlocal; 2718 redund->nsends = nsends; 2719 redund->nrecvs = nrecvs; 2720 redund->send_rank = send_rank; 2721 redund->recv_rank = recv_rank; 2722 redund->sbuf_nz = sbuf_nz; 2723 redund->rbuf_nz = rbuf_nz; 2724 redund->sbuf_j = sbuf_j; 2725 redund->sbuf_a = sbuf_a; 2726 redund->rbuf_j = rbuf_j; 2727 redund->rbuf_a = rbuf_a; 2728 redund->psubcomm = NULL; 2729 2730 redund->Destroy = C->ops->destroy; 2731 C->ops->destroy = MatDestroy_MatRedundant; 2732 } 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2738 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2739 { 2740 PetscErrorCode ierr; 2741 MPI_Comm comm; 2742 PetscMPIInt size,subsize; 2743 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2744 Mat_Redundant *redund=NULL; 2745 PetscSubcomm psubcomm=NULL; 2746 MPI_Comm subcomm_in=subcomm; 2747 Mat *matseq; 2748 IS isrow,iscol; 2749 2750 PetscFunctionBegin; 2751 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2752 if (reuse == MAT_INITIAL_MATRIX) { 2753 /* create psubcomm, then get subcomm */ 2754 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2755 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2756 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2757 2758 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2759 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2760 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2761 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2762 subcomm = psubcomm->comm; 2763 } else { /* retrieve psubcomm and subcomm */ 2764 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2765 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2766 if (subsize == 1) { 2767 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2768 redund = c->redundant; 2769 } else { 2770 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2771 redund = c->redundant; 2772 } 2773 psubcomm = redund->psubcomm; 2774 } 2775 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2776 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2777 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2778 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2779 if (subsize == 1) { 2780 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2781 c->redundant->psubcomm = psubcomm; 2782 } else { 2783 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2784 c->redundant->psubcomm = psubcomm ; 2785 } 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 } 2790 2791 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2792 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2793 if (reuse == MAT_INITIAL_MATRIX) { 2794 /* create a local sequential matrix matseq[0] */ 2795 mloc_sub = PETSC_DECIDE; 2796 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2797 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2798 rstart = rend - mloc_sub; 2799 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2800 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2801 } else { /* reuse == MAT_REUSE_MATRIX */ 2802 if (subsize == 1) { 2803 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2804 redund = c->redundant; 2805 } else { 2806 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2807 redund = c->redundant; 2808 } 2809 2810 isrow = redund->isrow; 2811 iscol = redund->iscol; 2812 matseq = redund->matseq; 2813 } 2814 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2815 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2816 2817 if (reuse == MAT_INITIAL_MATRIX) { 2818 /* create a supporting struct and attach it to C for reuse */ 2819 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2820 if (subsize == 1) { 2821 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2822 c->redundant = redund; 2823 } else { 2824 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2825 c->redundant = redund; 2826 } 2827 redund->isrow = isrow; 2828 redund->iscol = iscol; 2829 redund->matseq = matseq; 2830 redund->psubcomm = psubcomm; 2831 redund->Destroy = (*matredundant)->ops->destroy; 2832 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2833 } 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2839 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2840 { 2841 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2842 PetscErrorCode ierr; 2843 PetscInt i,*idxb = 0; 2844 PetscScalar *va,*vb; 2845 Vec vtmp; 2846 2847 PetscFunctionBegin; 2848 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2849 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2850 if (idx) { 2851 for (i=0; i<A->rmap->n; i++) { 2852 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2853 } 2854 } 2855 2856 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2857 if (idx) { 2858 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2859 } 2860 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2861 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2862 2863 for (i=0; i<A->rmap->n; i++) { 2864 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2865 va[i] = vb[i]; 2866 if (idx) idx[i] = a->garray[idxb[i]]; 2867 } 2868 } 2869 2870 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2871 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2872 ierr = PetscFree(idxb);CHKERRQ(ierr); 2873 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2874 PetscFunctionReturn(0); 2875 } 2876 2877 #undef __FUNCT__ 2878 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2879 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2880 { 2881 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2882 PetscErrorCode ierr; 2883 PetscInt i,*idxb = 0; 2884 PetscScalar *va,*vb; 2885 Vec vtmp; 2886 2887 PetscFunctionBegin; 2888 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2889 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2890 if (idx) { 2891 for (i=0; i<A->cmap->n; i++) { 2892 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2893 } 2894 } 2895 2896 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2897 if (idx) { 2898 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2899 } 2900 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2901 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2902 2903 for (i=0; i<A->rmap->n; i++) { 2904 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2905 va[i] = vb[i]; 2906 if (idx) idx[i] = a->garray[idxb[i]]; 2907 } 2908 } 2909 2910 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2911 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2912 ierr = PetscFree(idxb);CHKERRQ(ierr); 2913 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2914 PetscFunctionReturn(0); 2915 } 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2919 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2920 { 2921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2922 PetscInt n = A->rmap->n; 2923 PetscInt cstart = A->cmap->rstart; 2924 PetscInt *cmap = mat->garray; 2925 PetscInt *diagIdx, *offdiagIdx; 2926 Vec diagV, offdiagV; 2927 PetscScalar *a, *diagA, *offdiagA; 2928 PetscInt r; 2929 PetscErrorCode ierr; 2930 2931 PetscFunctionBegin; 2932 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2934 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2935 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2936 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2937 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2938 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2939 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2940 for (r = 0; r < n; ++r) { 2941 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2942 a[r] = diagA[r]; 2943 idx[r] = cstart + diagIdx[r]; 2944 } else { 2945 a[r] = offdiagA[r]; 2946 idx[r] = cmap[offdiagIdx[r]]; 2947 } 2948 } 2949 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2951 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2952 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2953 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2954 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2960 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2961 { 2962 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2963 PetscInt n = A->rmap->n; 2964 PetscInt cstart = A->cmap->rstart; 2965 PetscInt *cmap = mat->garray; 2966 PetscInt *diagIdx, *offdiagIdx; 2967 Vec diagV, offdiagV; 2968 PetscScalar *a, *diagA, *offdiagA; 2969 PetscInt r; 2970 PetscErrorCode ierr; 2971 2972 PetscFunctionBegin; 2973 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2974 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2975 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2976 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2977 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2978 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2979 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2980 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2981 for (r = 0; r < n; ++r) { 2982 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2983 a[r] = diagA[r]; 2984 idx[r] = cstart + diagIdx[r]; 2985 } else { 2986 a[r] = offdiagA[r]; 2987 idx[r] = cmap[offdiagIdx[r]]; 2988 } 2989 } 2990 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2991 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2992 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2993 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2994 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2995 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3001 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3002 { 3003 PetscErrorCode ierr; 3004 Mat *dummy; 3005 3006 PetscFunctionBegin; 3007 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3008 *newmat = *dummy; 3009 ierr = PetscFree(dummy);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3015 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3016 { 3017 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3018 PetscErrorCode ierr; 3019 3020 PetscFunctionBegin; 3021 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3022 PetscFunctionReturn(0); 3023 } 3024 3025 #undef __FUNCT__ 3026 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3027 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3028 { 3029 PetscErrorCode ierr; 3030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3031 3032 PetscFunctionBegin; 3033 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3034 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3035 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3036 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 /* -------------------------------------------------------------------*/ 3041 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3042 MatGetRow_MPIAIJ, 3043 MatRestoreRow_MPIAIJ, 3044 MatMult_MPIAIJ, 3045 /* 4*/ MatMultAdd_MPIAIJ, 3046 MatMultTranspose_MPIAIJ, 3047 MatMultTransposeAdd_MPIAIJ, 3048 #if defined(PETSC_HAVE_PBGL) 3049 MatSolve_MPIAIJ, 3050 #else 3051 0, 3052 #endif 3053 0, 3054 0, 3055 /*10*/ 0, 3056 0, 3057 0, 3058 MatSOR_MPIAIJ, 3059 MatTranspose_MPIAIJ, 3060 /*15*/ MatGetInfo_MPIAIJ, 3061 MatEqual_MPIAIJ, 3062 MatGetDiagonal_MPIAIJ, 3063 MatDiagonalScale_MPIAIJ, 3064 MatNorm_MPIAIJ, 3065 /*20*/ MatAssemblyBegin_MPIAIJ, 3066 MatAssemblyEnd_MPIAIJ, 3067 MatSetOption_MPIAIJ, 3068 MatZeroEntries_MPIAIJ, 3069 /*24*/ MatZeroRows_MPIAIJ, 3070 0, 3071 #if defined(PETSC_HAVE_PBGL) 3072 0, 3073 #else 3074 0, 3075 #endif 3076 0, 3077 0, 3078 /*29*/ MatSetUp_MPIAIJ, 3079 #if defined(PETSC_HAVE_PBGL) 3080 0, 3081 #else 3082 0, 3083 #endif 3084 0, 3085 0, 3086 0, 3087 /*34*/ MatDuplicate_MPIAIJ, 3088 0, 3089 0, 3090 0, 3091 0, 3092 /*39*/ MatAXPY_MPIAIJ, 3093 MatGetSubMatrices_MPIAIJ, 3094 MatIncreaseOverlap_MPIAIJ, 3095 MatGetValues_MPIAIJ, 3096 MatCopy_MPIAIJ, 3097 /*44*/ MatGetRowMax_MPIAIJ, 3098 MatScale_MPIAIJ, 3099 0, 3100 0, 3101 MatZeroRowsColumns_MPIAIJ, 3102 /*49*/ MatSetRandom_MPIAIJ, 3103 0, 3104 0, 3105 0, 3106 0, 3107 /*54*/ MatFDColoringCreate_MPIXAIJ, 3108 0, 3109 MatSetUnfactored_MPIAIJ, 3110 MatPermute_MPIAIJ, 3111 0, 3112 /*59*/ MatGetSubMatrix_MPIAIJ, 3113 MatDestroy_MPIAIJ, 3114 MatView_MPIAIJ, 3115 0, 3116 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3117 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3118 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3119 0, 3120 0, 3121 0, 3122 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3123 MatGetRowMinAbs_MPIAIJ, 3124 0, 3125 MatSetColoring_MPIAIJ, 3126 0, 3127 MatSetValuesAdifor_MPIAIJ, 3128 /*75*/ MatFDColoringApply_AIJ, 3129 0, 3130 0, 3131 0, 3132 MatFindZeroDiagonals_MPIAIJ, 3133 /*80*/ 0, 3134 0, 3135 0, 3136 /*83*/ MatLoad_MPIAIJ, 3137 0, 3138 0, 3139 0, 3140 0, 3141 0, 3142 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3143 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3144 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3145 MatPtAP_MPIAIJ_MPIAIJ, 3146 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3147 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3148 0, 3149 0, 3150 0, 3151 0, 3152 /*99*/ 0, 3153 0, 3154 0, 3155 MatConjugate_MPIAIJ, 3156 0, 3157 /*104*/MatSetValuesRow_MPIAIJ, 3158 MatRealPart_MPIAIJ, 3159 MatImaginaryPart_MPIAIJ, 3160 0, 3161 0, 3162 /*109*/0, 3163 MatGetRedundantMatrix_MPIAIJ, 3164 MatGetRowMin_MPIAIJ, 3165 0, 3166 0, 3167 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3168 0, 3169 0, 3170 0, 3171 0, 3172 /*119*/0, 3173 0, 3174 0, 3175 0, 3176 MatGetMultiProcBlock_MPIAIJ, 3177 /*124*/MatFindNonzeroRows_MPIAIJ, 3178 MatGetColumnNorms_MPIAIJ, 3179 MatInvertBlockDiagonal_MPIAIJ, 3180 0, 3181 MatGetSubMatricesParallel_MPIAIJ, 3182 /*129*/0, 3183 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3184 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3185 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3186 0, 3187 /*134*/0, 3188 0, 3189 0, 3190 0, 3191 0, 3192 /*139*/0, 3193 0, 3194 0, 3195 MatFDColoringSetUp_MPIXAIJ 3196 }; 3197 3198 /* ----------------------------------------------------------------------------------------*/ 3199 3200 #undef __FUNCT__ 3201 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3202 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3203 { 3204 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3205 PetscErrorCode ierr; 3206 3207 PetscFunctionBegin; 3208 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3209 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3210 PetscFunctionReturn(0); 3211 } 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3215 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3216 { 3217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3222 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3228 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3229 { 3230 Mat_MPIAIJ *b; 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3235 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3236 b = (Mat_MPIAIJ*)B->data; 3237 3238 if (!B->preallocated) { 3239 /* Explicitly create 2 MATSEQAIJ matrices. */ 3240 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3241 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3242 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3243 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3244 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3245 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3246 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3247 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3248 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3249 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3250 } 3251 3252 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3253 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3254 B->preallocated = PETSC_TRUE; 3255 PetscFunctionReturn(0); 3256 } 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3260 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3261 { 3262 Mat mat; 3263 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3264 PetscErrorCode ierr; 3265 3266 PetscFunctionBegin; 3267 *newmat = 0; 3268 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3269 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3270 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3271 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3272 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3273 a = (Mat_MPIAIJ*)mat->data; 3274 3275 mat->factortype = matin->factortype; 3276 mat->assembled = PETSC_TRUE; 3277 mat->insertmode = NOT_SET_VALUES; 3278 mat->preallocated = PETSC_TRUE; 3279 3280 a->size = oldmat->size; 3281 a->rank = oldmat->rank; 3282 a->donotstash = oldmat->donotstash; 3283 a->roworiented = oldmat->roworiented; 3284 a->rowindices = 0; 3285 a->rowvalues = 0; 3286 a->getrowactive = PETSC_FALSE; 3287 3288 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3289 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3290 3291 if (oldmat->colmap) { 3292 #if defined(PETSC_USE_CTABLE) 3293 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3294 #else 3295 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3296 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3297 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3298 #endif 3299 } else a->colmap = 0; 3300 if (oldmat->garray) { 3301 PetscInt len; 3302 len = oldmat->B->cmap->n; 3303 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3304 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3305 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3306 } else a->garray = 0; 3307 3308 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3309 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3310 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3312 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3314 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3316 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3317 *newmat = mat; 3318 PetscFunctionReturn(0); 3319 } 3320 3321 3322 3323 #undef __FUNCT__ 3324 #define __FUNCT__ "MatLoad_MPIAIJ" 3325 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3326 { 3327 PetscScalar *vals,*svals; 3328 MPI_Comm comm; 3329 PetscErrorCode ierr; 3330 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3331 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3332 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3333 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3334 PetscInt cend,cstart,n,*rowners,sizesset=1; 3335 int fd; 3336 PetscInt bs = 1; 3337 3338 PetscFunctionBegin; 3339 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3340 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3341 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3342 if (!rank) { 3343 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3344 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3345 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3346 } 3347 3348 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3349 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3350 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3351 3352 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3353 3354 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3355 M = header[1]; N = header[2]; 3356 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3357 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3358 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3359 3360 /* If global sizes are set, check if they are consistent with that given in the file */ 3361 if (sizesset) { 3362 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3363 } 3364 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3365 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3366 3367 /* determine ownership of all (block) rows */ 3368 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3369 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3370 else m = newMat->rmap->n; /* Set by user */ 3371 3372 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3373 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3374 3375 /* First process needs enough room for process with most rows */ 3376 if (!rank) { 3377 mmax = rowners[1]; 3378 for (i=2; i<=size; i++) { 3379 mmax = PetscMax(mmax, rowners[i]); 3380 } 3381 } else mmax = -1; /* unused, but compilers complain */ 3382 3383 rowners[0] = 0; 3384 for (i=2; i<=size; i++) { 3385 rowners[i] += rowners[i-1]; 3386 } 3387 rstart = rowners[rank]; 3388 rend = rowners[rank+1]; 3389 3390 /* distribute row lengths to all processors */ 3391 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3392 if (!rank) { 3393 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3394 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3395 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3396 for (j=0; j<m; j++) { 3397 procsnz[0] += ourlens[j]; 3398 } 3399 for (i=1; i<size; i++) { 3400 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3401 /* calculate the number of nonzeros on each processor */ 3402 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3403 procsnz[i] += rowlengths[j]; 3404 } 3405 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3406 } 3407 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3408 } else { 3409 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3410 } 3411 3412 if (!rank) { 3413 /* determine max buffer needed and allocate it */ 3414 maxnz = 0; 3415 for (i=0; i<size; i++) { 3416 maxnz = PetscMax(maxnz,procsnz[i]); 3417 } 3418 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3419 3420 /* read in my part of the matrix column indices */ 3421 nz = procsnz[0]; 3422 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3423 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3424 3425 /* read in every one elses and ship off */ 3426 for (i=1; i<size; i++) { 3427 nz = procsnz[i]; 3428 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3429 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3430 } 3431 ierr = PetscFree(cols);CHKERRQ(ierr); 3432 } else { 3433 /* determine buffer space needed for message */ 3434 nz = 0; 3435 for (i=0; i<m; i++) { 3436 nz += ourlens[i]; 3437 } 3438 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3439 3440 /* receive message of column indices*/ 3441 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3442 } 3443 3444 /* determine column ownership if matrix is not square */ 3445 if (N != M) { 3446 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3447 else n = newMat->cmap->n; 3448 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3449 cstart = cend - n; 3450 } else { 3451 cstart = rstart; 3452 cend = rend; 3453 n = cend - cstart; 3454 } 3455 3456 /* loop over local rows, determining number of off diagonal entries */ 3457 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3458 jj = 0; 3459 for (i=0; i<m; i++) { 3460 for (j=0; j<ourlens[i]; j++) { 3461 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3462 jj++; 3463 } 3464 } 3465 3466 for (i=0; i<m; i++) { 3467 ourlens[i] -= offlens[i]; 3468 } 3469 if (!sizesset) { 3470 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3471 } 3472 3473 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3474 3475 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3476 3477 for (i=0; i<m; i++) { 3478 ourlens[i] += offlens[i]; 3479 } 3480 3481 if (!rank) { 3482 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3483 3484 /* read in my part of the matrix numerical values */ 3485 nz = procsnz[0]; 3486 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3487 3488 /* insert into matrix */ 3489 jj = rstart; 3490 smycols = mycols; 3491 svals = vals; 3492 for (i=0; i<m; i++) { 3493 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3494 smycols += ourlens[i]; 3495 svals += ourlens[i]; 3496 jj++; 3497 } 3498 3499 /* read in other processors and ship out */ 3500 for (i=1; i<size; i++) { 3501 nz = procsnz[i]; 3502 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3503 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3504 } 3505 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3506 } else { 3507 /* receive numeric values */ 3508 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3509 3510 /* receive message of values*/ 3511 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3512 3513 /* insert into matrix */ 3514 jj = rstart; 3515 smycols = mycols; 3516 svals = vals; 3517 for (i=0; i<m; i++) { 3518 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3519 smycols += ourlens[i]; 3520 svals += ourlens[i]; 3521 jj++; 3522 } 3523 } 3524 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3525 ierr = PetscFree(vals);CHKERRQ(ierr); 3526 ierr = PetscFree(mycols);CHKERRQ(ierr); 3527 ierr = PetscFree(rowners);CHKERRQ(ierr); 3528 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3529 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3530 PetscFunctionReturn(0); 3531 } 3532 3533 #undef __FUNCT__ 3534 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3535 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3536 { 3537 PetscErrorCode ierr; 3538 IS iscol_local; 3539 PetscInt csize; 3540 3541 PetscFunctionBegin; 3542 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3543 if (call == MAT_REUSE_MATRIX) { 3544 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3545 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3546 } else { 3547 PetscInt cbs; 3548 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3549 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3550 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3551 } 3552 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3553 if (call == MAT_INITIAL_MATRIX) { 3554 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3555 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3556 } 3557 PetscFunctionReturn(0); 3558 } 3559 3560 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3561 #undef __FUNCT__ 3562 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3563 /* 3564 Not great since it makes two copies of the submatrix, first an SeqAIJ 3565 in local and then by concatenating the local matrices the end result. 3566 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3567 3568 Note: This requires a sequential iscol with all indices. 3569 */ 3570 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3571 { 3572 PetscErrorCode ierr; 3573 PetscMPIInt rank,size; 3574 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3575 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3576 PetscBool allcolumns, colflag; 3577 Mat M,Mreuse; 3578 MatScalar *vwork,*aa; 3579 MPI_Comm comm; 3580 Mat_SeqAIJ *aij; 3581 3582 PetscFunctionBegin; 3583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3584 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3586 3587 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3588 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3589 if (colflag && ncol == mat->cmap->N) { 3590 allcolumns = PETSC_TRUE; 3591 } else { 3592 allcolumns = PETSC_FALSE; 3593 } 3594 if (call == MAT_REUSE_MATRIX) { 3595 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3596 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3597 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3598 } else { 3599 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3600 } 3601 3602 /* 3603 m - number of local rows 3604 n - number of columns (same on all processors) 3605 rstart - first row in new global matrix generated 3606 */ 3607 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3608 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3609 if (call == MAT_INITIAL_MATRIX) { 3610 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3611 ii = aij->i; 3612 jj = aij->j; 3613 3614 /* 3615 Determine the number of non-zeros in the diagonal and off-diagonal 3616 portions of the matrix in order to do correct preallocation 3617 */ 3618 3619 /* first get start and end of "diagonal" columns */ 3620 if (csize == PETSC_DECIDE) { 3621 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3622 if (mglobal == n) { /* square matrix */ 3623 nlocal = m; 3624 } else { 3625 nlocal = n/size + ((n % size) > rank); 3626 } 3627 } else { 3628 nlocal = csize; 3629 } 3630 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3631 rstart = rend - nlocal; 3632 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3633 3634 /* next, compute all the lengths */ 3635 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3636 olens = dlens + m; 3637 for (i=0; i<m; i++) { 3638 jend = ii[i+1] - ii[i]; 3639 olen = 0; 3640 dlen = 0; 3641 for (j=0; j<jend; j++) { 3642 if (*jj < rstart || *jj >= rend) olen++; 3643 else dlen++; 3644 jj++; 3645 } 3646 olens[i] = olen; 3647 dlens[i] = dlen; 3648 } 3649 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3650 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3651 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3652 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3653 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3654 ierr = PetscFree(dlens);CHKERRQ(ierr); 3655 } else { 3656 PetscInt ml,nl; 3657 3658 M = *newmat; 3659 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3660 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3661 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3662 /* 3663 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3664 rather than the slower MatSetValues(). 3665 */ 3666 M->was_assembled = PETSC_TRUE; 3667 M->assembled = PETSC_FALSE; 3668 } 3669 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3670 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3671 ii = aij->i; 3672 jj = aij->j; 3673 aa = aij->a; 3674 for (i=0; i<m; i++) { 3675 row = rstart + i; 3676 nz = ii[i+1] - ii[i]; 3677 cwork = jj; jj += nz; 3678 vwork = aa; aa += nz; 3679 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3680 } 3681 3682 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 *newmat = M; 3685 3686 /* save submatrix used in processor for next request */ 3687 if (call == MAT_INITIAL_MATRIX) { 3688 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3689 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3690 } 3691 PetscFunctionReturn(0); 3692 } 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3696 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3697 { 3698 PetscInt m,cstart, cend,j,nnz,i,d; 3699 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3700 const PetscInt *JJ; 3701 PetscScalar *values; 3702 PetscErrorCode ierr; 3703 3704 PetscFunctionBegin; 3705 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3706 3707 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3708 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3709 m = B->rmap->n; 3710 cstart = B->cmap->rstart; 3711 cend = B->cmap->rend; 3712 rstart = B->rmap->rstart; 3713 3714 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3715 3716 #if defined(PETSC_USE_DEBUGGING) 3717 for (i=0; i<m; i++) { 3718 nnz = Ii[i+1]- Ii[i]; 3719 JJ = J + Ii[i]; 3720 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3721 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3722 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3723 } 3724 #endif 3725 3726 for (i=0; i<m; i++) { 3727 nnz = Ii[i+1]- Ii[i]; 3728 JJ = J + Ii[i]; 3729 nnz_max = PetscMax(nnz_max,nnz); 3730 d = 0; 3731 for (j=0; j<nnz; j++) { 3732 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3733 } 3734 d_nnz[i] = d; 3735 o_nnz[i] = nnz - d; 3736 } 3737 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3738 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3739 3740 if (v) values = (PetscScalar*)v; 3741 else { 3742 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3743 } 3744 3745 for (i=0; i<m; i++) { 3746 ii = i + rstart; 3747 nnz = Ii[i+1]- Ii[i]; 3748 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 3753 if (!v) { 3754 ierr = PetscFree(values);CHKERRQ(ierr); 3755 } 3756 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3757 PetscFunctionReturn(0); 3758 } 3759 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3762 /*@ 3763 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3764 (the default parallel PETSc format). 3765 3766 Collective on MPI_Comm 3767 3768 Input Parameters: 3769 + B - the matrix 3770 . i - the indices into j for the start of each local row (starts with zero) 3771 . j - the column indices for each local row (starts with zero) 3772 - v - optional values in the matrix 3773 3774 Level: developer 3775 3776 Notes: 3777 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3778 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3779 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3780 3781 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3782 3783 The format which is used for the sparse matrix input, is equivalent to a 3784 row-major ordering.. i.e for the following matrix, the input data expected is 3785 as shown: 3786 3787 1 0 0 3788 2 0 3 P0 3789 ------- 3790 4 5 6 P1 3791 3792 Process0 [P0]: rows_owned=[0,1] 3793 i = {0,1,3} [size = nrow+1 = 2+1] 3794 j = {0,0,2} [size = nz = 6] 3795 v = {1,2,3} [size = nz = 6] 3796 3797 Process1 [P1]: rows_owned=[2] 3798 i = {0,3} [size = nrow+1 = 1+1] 3799 j = {0,1,2} [size = nz = 6] 3800 v = {4,5,6} [size = nz = 6] 3801 3802 .keywords: matrix, aij, compressed row, sparse, parallel 3803 3804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3805 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3806 @*/ 3807 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3808 { 3809 PetscErrorCode ierr; 3810 3811 PetscFunctionBegin; 3812 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3813 PetscFunctionReturn(0); 3814 } 3815 3816 #undef __FUNCT__ 3817 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3818 /*@C 3819 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3820 (the default parallel PETSc format). For good matrix assembly performance 3821 the user should preallocate the matrix storage by setting the parameters 3822 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3823 performance can be increased by more than a factor of 50. 3824 3825 Collective on MPI_Comm 3826 3827 Input Parameters: 3828 + A - the matrix 3829 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3830 (same value is used for all local rows) 3831 . d_nnz - array containing the number of nonzeros in the various rows of the 3832 DIAGONAL portion of the local submatrix (possibly different for each row) 3833 or NULL, if d_nz is used to specify the nonzero structure. 3834 The size of this array is equal to the number of local rows, i.e 'm'. 3835 For matrices that will be factored, you must leave room for (and set) 3836 the diagonal entry even if it is zero. 3837 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3838 submatrix (same value is used for all local rows). 3839 - o_nnz - array containing the number of nonzeros in the various rows of the 3840 OFF-DIAGONAL portion of the local submatrix (possibly different for 3841 each row) or NULL, if o_nz is used to specify the nonzero 3842 structure. The size of this array is equal to the number 3843 of local rows, i.e 'm'. 3844 3845 If the *_nnz parameter is given then the *_nz parameter is ignored 3846 3847 The AIJ format (also called the Yale sparse matrix format or 3848 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3849 storage. The stored row and column indices begin with zero. 3850 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3851 3852 The parallel matrix is partitioned such that the first m0 rows belong to 3853 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3854 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3855 3856 The DIAGONAL portion of the local submatrix of a processor can be defined 3857 as the submatrix which is obtained by extraction the part corresponding to 3858 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3859 first row that belongs to the processor, r2 is the last row belonging to 3860 the this processor, and c1-c2 is range of indices of the local part of a 3861 vector suitable for applying the matrix to. This is an mxn matrix. In the 3862 common case of a square matrix, the row and column ranges are the same and 3863 the DIAGONAL part is also square. The remaining portion of the local 3864 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3865 3866 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3867 3868 You can call MatGetInfo() to get information on how effective the preallocation was; 3869 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3870 You can also run with the option -info and look for messages with the string 3871 malloc in them to see if additional memory allocation was needed. 3872 3873 Example usage: 3874 3875 Consider the following 8x8 matrix with 34 non-zero values, that is 3876 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3877 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3878 as follows: 3879 3880 .vb 3881 1 2 0 | 0 3 0 | 0 4 3882 Proc0 0 5 6 | 7 0 0 | 8 0 3883 9 0 10 | 11 0 0 | 12 0 3884 ------------------------------------- 3885 13 0 14 | 15 16 17 | 0 0 3886 Proc1 0 18 0 | 19 20 21 | 0 0 3887 0 0 0 | 22 23 0 | 24 0 3888 ------------------------------------- 3889 Proc2 25 26 27 | 0 0 28 | 29 0 3890 30 0 0 | 31 32 33 | 0 34 3891 .ve 3892 3893 This can be represented as a collection of submatrices as: 3894 3895 .vb 3896 A B C 3897 D E F 3898 G H I 3899 .ve 3900 3901 Where the submatrices A,B,C are owned by proc0, D,E,F are 3902 owned by proc1, G,H,I are owned by proc2. 3903 3904 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3905 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3906 The 'M','N' parameters are 8,8, and have the same values on all procs. 3907 3908 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3909 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3910 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3911 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3912 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3913 matrix, ans [DF] as another SeqAIJ matrix. 3914 3915 When d_nz, o_nz parameters are specified, d_nz storage elements are 3916 allocated for every row of the local diagonal submatrix, and o_nz 3917 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3918 One way to choose d_nz and o_nz is to use the max nonzerors per local 3919 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3920 In this case, the values of d_nz,o_nz are: 3921 .vb 3922 proc0 : dnz = 2, o_nz = 2 3923 proc1 : dnz = 3, o_nz = 2 3924 proc2 : dnz = 1, o_nz = 4 3925 .ve 3926 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3927 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3928 for proc3. i.e we are using 12+15+10=37 storage locations to store 3929 34 values. 3930 3931 When d_nnz, o_nnz parameters are specified, the storage is specified 3932 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3933 In the above case the values for d_nnz,o_nnz are: 3934 .vb 3935 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3936 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3937 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3938 .ve 3939 Here the space allocated is sum of all the above values i.e 34, and 3940 hence pre-allocation is perfect. 3941 3942 Level: intermediate 3943 3944 .keywords: matrix, aij, compressed row, sparse, parallel 3945 3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3947 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3948 @*/ 3949 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3950 { 3951 PetscErrorCode ierr; 3952 3953 PetscFunctionBegin; 3954 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3955 PetscValidType(B,1); 3956 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3957 PetscFunctionReturn(0); 3958 } 3959 3960 #undef __FUNCT__ 3961 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3962 /*@ 3963 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3964 CSR format the local rows. 3965 3966 Collective on MPI_Comm 3967 3968 Input Parameters: 3969 + comm - MPI communicator 3970 . m - number of local rows (Cannot be PETSC_DECIDE) 3971 . n - This value should be the same as the local size used in creating the 3972 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3973 calculated if N is given) For square matrices n is almost always m. 3974 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3975 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3976 . i - row indices 3977 . j - column indices 3978 - a - matrix values 3979 3980 Output Parameter: 3981 . mat - the matrix 3982 3983 Level: intermediate 3984 3985 Notes: 3986 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3987 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3988 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3989 3990 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3991 3992 The format which is used for the sparse matrix input, is equivalent to a 3993 row-major ordering.. i.e for the following matrix, the input data expected is 3994 as shown: 3995 3996 1 0 0 3997 2 0 3 P0 3998 ------- 3999 4 5 6 P1 4000 4001 Process0 [P0]: rows_owned=[0,1] 4002 i = {0,1,3} [size = nrow+1 = 2+1] 4003 j = {0,0,2} [size = nz = 6] 4004 v = {1,2,3} [size = nz = 6] 4005 4006 Process1 [P1]: rows_owned=[2] 4007 i = {0,3} [size = nrow+1 = 1+1] 4008 j = {0,1,2} [size = nz = 6] 4009 v = {4,5,6} [size = nz = 6] 4010 4011 .keywords: matrix, aij, compressed row, sparse, parallel 4012 4013 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4014 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4015 @*/ 4016 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4017 { 4018 PetscErrorCode ierr; 4019 4020 PetscFunctionBegin; 4021 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4022 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4023 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4024 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4025 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4026 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4027 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4028 PetscFunctionReturn(0); 4029 } 4030 4031 #undef __FUNCT__ 4032 #define __FUNCT__ "MatCreateAIJ" 4033 /*@C 4034 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4035 (the default parallel PETSc format). For good matrix assembly performance 4036 the user should preallocate the matrix storage by setting the parameters 4037 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4038 performance can be increased by more than a factor of 50. 4039 4040 Collective on MPI_Comm 4041 4042 Input Parameters: 4043 + comm - MPI communicator 4044 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4045 This value should be the same as the local size used in creating the 4046 y vector for the matrix-vector product y = Ax. 4047 . n - This value should be the same as the local size used in creating the 4048 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4049 calculated if N is given) For square matrices n is almost always m. 4050 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4051 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4052 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4053 (same value is used for all local rows) 4054 . d_nnz - array containing the number of nonzeros in the various rows of the 4055 DIAGONAL portion of the local submatrix (possibly different for each row) 4056 or NULL, if d_nz is used to specify the nonzero structure. 4057 The size of this array is equal to the number of local rows, i.e 'm'. 4058 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4059 submatrix (same value is used for all local rows). 4060 - o_nnz - array containing the number of nonzeros in the various rows of the 4061 OFF-DIAGONAL portion of the local submatrix (possibly different for 4062 each row) or NULL, if o_nz is used to specify the nonzero 4063 structure. The size of this array is equal to the number 4064 of local rows, i.e 'm'. 4065 4066 Output Parameter: 4067 . A - the matrix 4068 4069 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4070 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4071 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4072 4073 Notes: 4074 If the *_nnz parameter is given then the *_nz parameter is ignored 4075 4076 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4077 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4078 storage requirements for this matrix. 4079 4080 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4081 processor than it must be used on all processors that share the object for 4082 that argument. 4083 4084 The user MUST specify either the local or global matrix dimensions 4085 (possibly both). 4086 4087 The parallel matrix is partitioned across processors such that the 4088 first m0 rows belong to process 0, the next m1 rows belong to 4089 process 1, the next m2 rows belong to process 2 etc.. where 4090 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4091 values corresponding to [m x N] submatrix. 4092 4093 The columns are logically partitioned with the n0 columns belonging 4094 to 0th partition, the next n1 columns belonging to the next 4095 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4096 4097 The DIAGONAL portion of the local submatrix on any given processor 4098 is the submatrix corresponding to the rows and columns m,n 4099 corresponding to the given processor. i.e diagonal matrix on 4100 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4101 etc. The remaining portion of the local submatrix [m x (N-n)] 4102 constitute the OFF-DIAGONAL portion. The example below better 4103 illustrates this concept. 4104 4105 For a square global matrix we define each processor's diagonal portion 4106 to be its local rows and the corresponding columns (a square submatrix); 4107 each processor's off-diagonal portion encompasses the remainder of the 4108 local matrix (a rectangular submatrix). 4109 4110 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4111 4112 When calling this routine with a single process communicator, a matrix of 4113 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4114 type of communicator, use the construction mechanism: 4115 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4116 4117 By default, this format uses inodes (identical nodes) when possible. 4118 We search for consecutive rows with the same nonzero structure, thereby 4119 reusing matrix information to achieve increased efficiency. 4120 4121 Options Database Keys: 4122 + -mat_no_inode - Do not use inodes 4123 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4124 - -mat_aij_oneindex - Internally use indexing starting at 1 4125 rather than 0. Note that when calling MatSetValues(), 4126 the user still MUST index entries starting at 0! 4127 4128 4129 Example usage: 4130 4131 Consider the following 8x8 matrix with 34 non-zero values, that is 4132 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4133 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4134 as follows: 4135 4136 .vb 4137 1 2 0 | 0 3 0 | 0 4 4138 Proc0 0 5 6 | 7 0 0 | 8 0 4139 9 0 10 | 11 0 0 | 12 0 4140 ------------------------------------- 4141 13 0 14 | 15 16 17 | 0 0 4142 Proc1 0 18 0 | 19 20 21 | 0 0 4143 0 0 0 | 22 23 0 | 24 0 4144 ------------------------------------- 4145 Proc2 25 26 27 | 0 0 28 | 29 0 4146 30 0 0 | 31 32 33 | 0 34 4147 .ve 4148 4149 This can be represented as a collection of submatrices as: 4150 4151 .vb 4152 A B C 4153 D E F 4154 G H I 4155 .ve 4156 4157 Where the submatrices A,B,C are owned by proc0, D,E,F are 4158 owned by proc1, G,H,I are owned by proc2. 4159 4160 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4161 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4162 The 'M','N' parameters are 8,8, and have the same values on all procs. 4163 4164 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4165 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4166 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4167 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4168 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4169 matrix, ans [DF] as another SeqAIJ matrix. 4170 4171 When d_nz, o_nz parameters are specified, d_nz storage elements are 4172 allocated for every row of the local diagonal submatrix, and o_nz 4173 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4174 One way to choose d_nz and o_nz is to use the max nonzerors per local 4175 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4176 In this case, the values of d_nz,o_nz are: 4177 .vb 4178 proc0 : dnz = 2, o_nz = 2 4179 proc1 : dnz = 3, o_nz = 2 4180 proc2 : dnz = 1, o_nz = 4 4181 .ve 4182 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4183 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4184 for proc3. i.e we are using 12+15+10=37 storage locations to store 4185 34 values. 4186 4187 When d_nnz, o_nnz parameters are specified, the storage is specified 4188 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4189 In the above case the values for d_nnz,o_nnz are: 4190 .vb 4191 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4192 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4193 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4194 .ve 4195 Here the space allocated is sum of all the above values i.e 34, and 4196 hence pre-allocation is perfect. 4197 4198 Level: intermediate 4199 4200 .keywords: matrix, aij, compressed row, sparse, parallel 4201 4202 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4203 MPIAIJ, MatCreateMPIAIJWithArrays() 4204 @*/ 4205 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4206 { 4207 PetscErrorCode ierr; 4208 PetscMPIInt size; 4209 4210 PetscFunctionBegin; 4211 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4212 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4213 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4214 if (size > 1) { 4215 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4216 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4217 } else { 4218 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4219 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4220 } 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #undef __FUNCT__ 4225 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4226 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4227 { 4228 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4229 4230 PetscFunctionBegin; 4231 *Ad = a->A; 4232 *Ao = a->B; 4233 *colmap = a->garray; 4234 PetscFunctionReturn(0); 4235 } 4236 4237 #undef __FUNCT__ 4238 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4239 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4240 { 4241 PetscErrorCode ierr; 4242 PetscInt i; 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 4245 PetscFunctionBegin; 4246 if (coloring->ctype == IS_COLORING_GLOBAL) { 4247 ISColoringValue *allcolors,*colors; 4248 ISColoring ocoloring; 4249 4250 /* set coloring for diagonal portion */ 4251 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4252 4253 /* set coloring for off-diagonal portion */ 4254 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4255 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4256 for (i=0; i<a->B->cmap->n; i++) { 4257 colors[i] = allcolors[a->garray[i]]; 4258 } 4259 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4260 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4261 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4262 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4263 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4264 ISColoringValue *colors; 4265 PetscInt *larray; 4266 ISColoring ocoloring; 4267 4268 /* set coloring for diagonal portion */ 4269 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4270 for (i=0; i<a->A->cmap->n; i++) { 4271 larray[i] = i + A->cmap->rstart; 4272 } 4273 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4274 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4275 for (i=0; i<a->A->cmap->n; i++) { 4276 colors[i] = coloring->colors[larray[i]]; 4277 } 4278 ierr = PetscFree(larray);CHKERRQ(ierr); 4279 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4280 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4281 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4282 4283 /* set coloring for off-diagonal portion */ 4284 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4285 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4286 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4287 for (i=0; i<a->B->cmap->n; i++) { 4288 colors[i] = coloring->colors[larray[i]]; 4289 } 4290 ierr = PetscFree(larray);CHKERRQ(ierr); 4291 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4292 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4293 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4294 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4295 PetscFunctionReturn(0); 4296 } 4297 4298 #undef __FUNCT__ 4299 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4300 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4301 { 4302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4303 PetscErrorCode ierr; 4304 4305 PetscFunctionBegin; 4306 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4307 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4313 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4314 { 4315 PetscErrorCode ierr; 4316 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4317 PetscInt *indx; 4318 4319 PetscFunctionBegin; 4320 /* This routine will ONLY return MPIAIJ type matrix */ 4321 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4322 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4323 if (n == PETSC_DECIDE) { 4324 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4325 } 4326 /* Check sum(n) = N */ 4327 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4328 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4329 4330 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4331 rstart -= m; 4332 4333 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4334 for (i=0; i<m; i++) { 4335 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4336 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4337 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4338 } 4339 4340 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4341 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4342 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4343 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4345 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4346 PetscFunctionReturn(0); 4347 } 4348 4349 #undef __FUNCT__ 4350 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4351 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4352 { 4353 PetscErrorCode ierr; 4354 PetscInt m,N,i,rstart,nnz,Ii; 4355 PetscInt *indx; 4356 PetscScalar *values; 4357 4358 PetscFunctionBegin; 4359 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4360 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4361 for (i=0; i<m; i++) { 4362 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4363 Ii = i + rstart; 4364 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4365 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4366 } 4367 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4368 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4374 /*@ 4375 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4376 matrices from each processor 4377 4378 Collective on MPI_Comm 4379 4380 Input Parameters: 4381 + comm - the communicators the parallel matrix will live on 4382 . inmat - the input sequential matrices 4383 . n - number of local columns (or PETSC_DECIDE) 4384 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4385 4386 Output Parameter: 4387 . outmat - the parallel matrix generated 4388 4389 Level: advanced 4390 4391 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4392 4393 @*/ 4394 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4395 { 4396 PetscErrorCode ierr; 4397 PetscMPIInt size; 4398 4399 PetscFunctionBegin; 4400 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4401 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4402 if (size == 1) { 4403 if (scall == MAT_INITIAL_MATRIX) { 4404 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4405 } else { 4406 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4407 } 4408 } else { 4409 if (scall == MAT_INITIAL_MATRIX) { 4410 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4411 } 4412 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4413 } 4414 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4415 PetscFunctionReturn(0); 4416 } 4417 4418 #undef __FUNCT__ 4419 #define __FUNCT__ "MatFileSplit" 4420 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4421 { 4422 PetscErrorCode ierr; 4423 PetscMPIInt rank; 4424 PetscInt m,N,i,rstart,nnz; 4425 size_t len; 4426 const PetscInt *indx; 4427 PetscViewer out; 4428 char *name; 4429 Mat B; 4430 const PetscScalar *values; 4431 4432 PetscFunctionBegin; 4433 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4434 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4435 /* Should this be the type of the diagonal block of A? */ 4436 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4437 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4438 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4439 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4440 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4441 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4442 for (i=0; i<m; i++) { 4443 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4444 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 4450 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4451 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4452 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4453 sprintf(name,"%s.%d",outfile,rank); 4454 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4455 ierr = PetscFree(name);CHKERRQ(ierr); 4456 ierr = MatView(B,out);CHKERRQ(ierr); 4457 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4458 ierr = MatDestroy(&B);CHKERRQ(ierr); 4459 PetscFunctionReturn(0); 4460 } 4461 4462 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4463 #undef __FUNCT__ 4464 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4465 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4466 { 4467 PetscErrorCode ierr; 4468 Mat_Merge_SeqsToMPI *merge; 4469 PetscContainer container; 4470 4471 PetscFunctionBegin; 4472 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4473 if (container) { 4474 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4487 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4488 ierr = PetscFree(merge);CHKERRQ(ierr); 4489 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4490 } 4491 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #include <../src/mat/utils/freespace.h> 4496 #include <petscbt.h> 4497 4498 #undef __FUNCT__ 4499 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4500 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4501 { 4502 PetscErrorCode ierr; 4503 MPI_Comm comm; 4504 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4505 PetscMPIInt size,rank,taga,*len_s; 4506 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4507 PetscInt proc,m; 4508 PetscInt **buf_ri,**buf_rj; 4509 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4510 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4511 MPI_Request *s_waits,*r_waits; 4512 MPI_Status *status; 4513 MatScalar *aa=a->a; 4514 MatScalar **abuf_r,*ba_i; 4515 Mat_Merge_SeqsToMPI *merge; 4516 PetscContainer container; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4520 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4521 4522 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4524 4525 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4526 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4527 4528 bi = merge->bi; 4529 bj = merge->bj; 4530 buf_ri = merge->buf_ri; 4531 buf_rj = merge->buf_rj; 4532 4533 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4534 owners = merge->rowmap->range; 4535 len_s = merge->len_s; 4536 4537 /* send and recv matrix values */ 4538 /*-----------------------------*/ 4539 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4540 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4541 4542 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4543 for (proc=0,k=0; proc<size; proc++) { 4544 if (!len_s[proc]) continue; 4545 i = owners[proc]; 4546 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4547 k++; 4548 } 4549 4550 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4551 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4552 ierr = PetscFree(status);CHKERRQ(ierr); 4553 4554 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4555 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4556 4557 /* insert mat values of mpimat */ 4558 /*----------------------------*/ 4559 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4560 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4561 4562 for (k=0; k<merge->nrecv; k++) { 4563 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4564 nrows = *(buf_ri_k[k]); 4565 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4566 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4567 } 4568 4569 /* set values of ba */ 4570 m = merge->rowmap->n; 4571 for (i=0; i<m; i++) { 4572 arow = owners[rank] + i; 4573 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4574 bnzi = bi[i+1] - bi[i]; 4575 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4576 4577 /* add local non-zero vals of this proc's seqmat into ba */ 4578 anzi = ai[arow+1] - ai[arow]; 4579 aj = a->j + ai[arow]; 4580 aa = a->a + ai[arow]; 4581 nextaj = 0; 4582 for (j=0; nextaj<anzi; j++) { 4583 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4584 ba_i[j] += aa[nextaj++]; 4585 } 4586 } 4587 4588 /* add received vals into ba */ 4589 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4590 /* i-th row */ 4591 if (i == *nextrow[k]) { 4592 anzi = *(nextai[k]+1) - *nextai[k]; 4593 aj = buf_rj[k] + *(nextai[k]); 4594 aa = abuf_r[k] + *(nextai[k]); 4595 nextaj = 0; 4596 for (j=0; nextaj<anzi; j++) { 4597 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4598 ba_i[j] += aa[nextaj++]; 4599 } 4600 } 4601 nextrow[k]++; nextai[k]++; 4602 } 4603 } 4604 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4605 } 4606 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4607 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4608 4609 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4610 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4611 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4612 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4613 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4614 PetscFunctionReturn(0); 4615 } 4616 4617 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4618 4619 #undef __FUNCT__ 4620 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4621 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4622 { 4623 PetscErrorCode ierr; 4624 Mat B_mpi; 4625 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4626 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4627 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4628 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4629 PetscInt len,proc,*dnz,*onz,bs,cbs; 4630 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4631 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4632 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4633 MPI_Status *status; 4634 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4635 PetscBT lnkbt; 4636 Mat_Merge_SeqsToMPI *merge; 4637 PetscContainer container; 4638 4639 PetscFunctionBegin; 4640 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4641 4642 /* make sure it is a PETSc comm */ 4643 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4646 4647 ierr = PetscNew(&merge);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4649 4650 /* determine row ownership */ 4651 /*---------------------------------------------------------*/ 4652 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4654 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4655 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4656 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4657 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4658 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4659 4660 m = merge->rowmap->n; 4661 owners = merge->rowmap->range; 4662 4663 /* determine the number of messages to send, their lengths */ 4664 /*---------------------------------------------------------*/ 4665 len_s = merge->len_s; 4666 4667 len = 0; /* length of buf_si[] */ 4668 merge->nsend = 0; 4669 for (proc=0; proc<size; proc++) { 4670 len_si[proc] = 0; 4671 if (proc == rank) { 4672 len_s[proc] = 0; 4673 } else { 4674 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4675 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4676 } 4677 if (len_s[proc]) { 4678 merge->nsend++; 4679 nrows = 0; 4680 for (i=owners[proc]; i<owners[proc+1]; i++) { 4681 if (ai[i+1] > ai[i]) nrows++; 4682 } 4683 len_si[proc] = 2*(nrows+1); 4684 len += len_si[proc]; 4685 } 4686 } 4687 4688 /* determine the number and length of messages to receive for ij-structure */ 4689 /*-------------------------------------------------------------------------*/ 4690 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4691 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4692 4693 /* post the Irecv of j-structure */ 4694 /*-------------------------------*/ 4695 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4696 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4697 4698 /* post the Isend of j-structure */ 4699 /*--------------------------------*/ 4700 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4701 4702 for (proc=0, k=0; proc<size; proc++) { 4703 if (!len_s[proc]) continue; 4704 i = owners[proc]; 4705 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4706 k++; 4707 } 4708 4709 /* receives and sends of j-structure are complete */ 4710 /*------------------------------------------------*/ 4711 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4712 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4713 4714 /* send and recv i-structure */ 4715 /*---------------------------*/ 4716 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4717 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4718 4719 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4720 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4721 for (proc=0,k=0; proc<size; proc++) { 4722 if (!len_s[proc]) continue; 4723 /* form outgoing message for i-structure: 4724 buf_si[0]: nrows to be sent 4725 [1:nrows]: row index (global) 4726 [nrows+1:2*nrows+1]: i-structure index 4727 */ 4728 /*-------------------------------------------*/ 4729 nrows = len_si[proc]/2 - 1; 4730 buf_si_i = buf_si + nrows+1; 4731 buf_si[0] = nrows; 4732 buf_si_i[0] = 0; 4733 nrows = 0; 4734 for (i=owners[proc]; i<owners[proc+1]; i++) { 4735 anzi = ai[i+1] - ai[i]; 4736 if (anzi) { 4737 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4738 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4739 nrows++; 4740 } 4741 } 4742 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4743 k++; 4744 buf_si += len_si[proc]; 4745 } 4746 4747 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4748 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4749 4750 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4751 for (i=0; i<merge->nrecv; i++) { 4752 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4753 } 4754 4755 ierr = PetscFree(len_si);CHKERRQ(ierr); 4756 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4757 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4758 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4759 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4760 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4761 ierr = PetscFree(status);CHKERRQ(ierr); 4762 4763 /* compute a local seq matrix in each processor */ 4764 /*----------------------------------------------*/ 4765 /* allocate bi array and free space for accumulating nonzero column info */ 4766 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4767 bi[0] = 0; 4768 4769 /* create and initialize a linked list */ 4770 nlnk = N+1; 4771 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4772 4773 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4774 len = ai[owners[rank+1]] - ai[owners[rank]]; 4775 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4776 4777 current_space = free_space; 4778 4779 /* determine symbolic info for each local row */ 4780 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4781 4782 for (k=0; k<merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *buf_ri_k[k]; 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4790 len = 0; 4791 for (i=0; i<m; i++) { 4792 bnzi = 0; 4793 /* add local non-zero cols of this proc's seqmat into lnk */ 4794 arow = owners[rank] + i; 4795 anzi = ai[arow+1] - ai[arow]; 4796 aj = a->j + ai[arow]; 4797 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4798 bnzi += nlnk; 4799 /* add received col data into lnk */ 4800 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4801 if (i == *nextrow[k]) { /* i-th row */ 4802 anzi = *(nextai[k]+1) - *nextai[k]; 4803 aj = buf_rj[k] + *nextai[k]; 4804 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4805 bnzi += nlnk; 4806 nextrow[k]++; nextai[k]++; 4807 } 4808 } 4809 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4810 4811 /* if free space is not available, make more free space */ 4812 if (current_space->local_remaining<bnzi) { 4813 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4814 nspacedouble++; 4815 } 4816 /* copy data into free space, then initialize lnk */ 4817 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4818 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4819 4820 current_space->array += bnzi; 4821 current_space->local_used += bnzi; 4822 current_space->local_remaining -= bnzi; 4823 4824 bi[i+1] = bi[i] + bnzi; 4825 } 4826 4827 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4828 4829 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4830 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4831 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4832 4833 /* create symbolic parallel matrix B_mpi */ 4834 /*---------------------------------------*/ 4835 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4836 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4837 if (n==PETSC_DECIDE) { 4838 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4839 } else { 4840 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4841 } 4842 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4843 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4844 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4845 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4846 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4847 4848 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4849 B_mpi->assembled = PETSC_FALSE; 4850 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4851 merge->bi = bi; 4852 merge->bj = bj; 4853 merge->buf_ri = buf_ri; 4854 merge->buf_rj = buf_rj; 4855 merge->coi = NULL; 4856 merge->coj = NULL; 4857 merge->owners_co = NULL; 4858 4859 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4860 4861 /* attach the supporting struct to B_mpi for reuse */ 4862 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4863 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4864 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4865 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4866 *mpimat = B_mpi; 4867 4868 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4869 PetscFunctionReturn(0); 4870 } 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4874 /*@C 4875 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4876 matrices from each processor 4877 4878 Collective on MPI_Comm 4879 4880 Input Parameters: 4881 + comm - the communicators the parallel matrix will live on 4882 . seqmat - the input sequential matrices 4883 . m - number of local rows (or PETSC_DECIDE) 4884 . n - number of local columns (or PETSC_DECIDE) 4885 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4886 4887 Output Parameter: 4888 . mpimat - the parallel matrix generated 4889 4890 Level: advanced 4891 4892 Notes: 4893 The dimensions of the sequential matrix in each processor MUST be the same. 4894 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4895 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4896 @*/ 4897 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4898 { 4899 PetscErrorCode ierr; 4900 PetscMPIInt size; 4901 4902 PetscFunctionBegin; 4903 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4904 if (size == 1) { 4905 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4906 if (scall == MAT_INITIAL_MATRIX) { 4907 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4908 } else { 4909 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4910 } 4911 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4912 PetscFunctionReturn(0); 4913 } 4914 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 if (scall == MAT_INITIAL_MATRIX) { 4916 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4917 } 4918 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4919 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 PetscFunctionReturn(0); 4921 } 4922 4923 #undef __FUNCT__ 4924 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4925 /*@ 4926 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4927 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4928 with MatGetSize() 4929 4930 Not Collective 4931 4932 Input Parameters: 4933 + A - the matrix 4934 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4935 4936 Output Parameter: 4937 . A_loc - the local sequential matrix generated 4938 4939 Level: developer 4940 4941 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4942 4943 @*/ 4944 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4945 { 4946 PetscErrorCode ierr; 4947 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4948 Mat_SeqAIJ *mat,*a,*b; 4949 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4950 MatScalar *aa,*ba,*cam; 4951 PetscScalar *ca; 4952 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4953 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4954 PetscBool match; 4955 4956 PetscFunctionBegin; 4957 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4958 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4959 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4960 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4961 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4962 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4963 aa = a->a; ba = b->a; 4964 if (scall == MAT_INITIAL_MATRIX) { 4965 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4966 ci[0] = 0; 4967 for (i=0; i<am; i++) { 4968 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4969 } 4970 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4971 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4972 k = 0; 4973 for (i=0; i<am; i++) { 4974 ncols_o = bi[i+1] - bi[i]; 4975 ncols_d = ai[i+1] - ai[i]; 4976 /* off-diagonal portion of A */ 4977 for (jo=0; jo<ncols_o; jo++) { 4978 col = cmap[*bj]; 4979 if (col >= cstart) break; 4980 cj[k] = col; bj++; 4981 ca[k++] = *ba++; 4982 } 4983 /* diagonal portion of A */ 4984 for (j=0; j<ncols_d; j++) { 4985 cj[k] = cstart + *aj++; 4986 ca[k++] = *aa++; 4987 } 4988 /* off-diagonal portion of A */ 4989 for (j=jo; j<ncols_o; j++) { 4990 cj[k] = cmap[*bj++]; 4991 ca[k++] = *ba++; 4992 } 4993 } 4994 /* put together the new matrix */ 4995 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4996 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4997 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4998 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4999 mat->free_a = PETSC_TRUE; 5000 mat->free_ij = PETSC_TRUE; 5001 mat->nonew = 0; 5002 } else if (scall == MAT_REUSE_MATRIX) { 5003 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5004 ci = mat->i; cj = mat->j; cam = mat->a; 5005 for (i=0; i<am; i++) { 5006 /* off-diagonal portion of A */ 5007 ncols_o = bi[i+1] - bi[i]; 5008 for (jo=0; jo<ncols_o; jo++) { 5009 col = cmap[*bj]; 5010 if (col >= cstart) break; 5011 *cam++ = *ba++; bj++; 5012 } 5013 /* diagonal portion of A */ 5014 ncols_d = ai[i+1] - ai[i]; 5015 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5016 /* off-diagonal portion of A */ 5017 for (j=jo; j<ncols_o; j++) { 5018 *cam++ = *ba++; bj++; 5019 } 5020 } 5021 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5022 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 #undef __FUNCT__ 5027 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5028 /*@C 5029 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5030 5031 Not Collective 5032 5033 Input Parameters: 5034 + A - the matrix 5035 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 - row, col - index sets of rows and columns to extract (or NULL) 5037 5038 Output Parameter: 5039 . A_loc - the local sequential matrix generated 5040 5041 Level: developer 5042 5043 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5044 5045 @*/ 5046 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5047 { 5048 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5049 PetscErrorCode ierr; 5050 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5051 IS isrowa,iscola; 5052 Mat *aloc; 5053 PetscBool match; 5054 5055 PetscFunctionBegin; 5056 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5057 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5058 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5059 if (!row) { 5060 start = A->rmap->rstart; end = A->rmap->rend; 5061 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5062 } else { 5063 isrowa = *row; 5064 } 5065 if (!col) { 5066 start = A->cmap->rstart; 5067 cmap = a->garray; 5068 nzA = a->A->cmap->n; 5069 nzB = a->B->cmap->n; 5070 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5071 ncols = 0; 5072 for (i=0; i<nzB; i++) { 5073 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5074 else break; 5075 } 5076 imark = i; 5077 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5078 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5079 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5080 } else { 5081 iscola = *col; 5082 } 5083 if (scall != MAT_INITIAL_MATRIX) { 5084 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5085 aloc[0] = *A_loc; 5086 } 5087 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5088 *A_loc = aloc[0]; 5089 ierr = PetscFree(aloc);CHKERRQ(ierr); 5090 if (!row) { 5091 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5092 } 5093 if (!col) { 5094 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5095 } 5096 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatGetBrowsOfAcols" 5102 /*@C 5103 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5104 5105 Collective on Mat 5106 5107 Input Parameters: 5108 + A,B - the matrices in mpiaij format 5109 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5110 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5111 5112 Output Parameter: 5113 + rowb, colb - index sets of rows and columns of B to extract 5114 - B_seq - the sequential matrix generated 5115 5116 Level: developer 5117 5118 @*/ 5119 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5120 { 5121 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5122 PetscErrorCode ierr; 5123 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5124 IS isrowb,iscolb; 5125 Mat *bseq=NULL; 5126 5127 PetscFunctionBegin; 5128 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5129 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5130 } 5131 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5132 5133 if (scall == MAT_INITIAL_MATRIX) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { /* row < local row index */ 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5148 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5149 } else { 5150 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5151 isrowb = *rowb; iscolb = *colb; 5152 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5153 bseq[0] = *B_seq; 5154 } 5155 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5156 *B_seq = bseq[0]; 5157 ierr = PetscFree(bseq);CHKERRQ(ierr); 5158 if (!rowb) { 5159 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5160 } else { 5161 *rowb = isrowb; 5162 } 5163 if (!colb) { 5164 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5165 } else { 5166 *colb = iscolb; 5167 } 5168 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5169 PetscFunctionReturn(0); 5170 } 5171 5172 #undef __FUNCT__ 5173 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5174 /* 5175 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5176 of the OFF-DIAGONAL portion of local A 5177 5178 Collective on Mat 5179 5180 Input Parameters: 5181 + A,B - the matrices in mpiaij format 5182 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5183 5184 Output Parameter: 5185 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5186 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5187 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5188 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5189 5190 Level: developer 5191 5192 */ 5193 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5194 { 5195 VecScatter_MPI_General *gen_to,*gen_from; 5196 PetscErrorCode ierr; 5197 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5198 Mat_SeqAIJ *b_oth; 5199 VecScatter ctx =a->Mvctx; 5200 MPI_Comm comm; 5201 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5202 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5203 PetscScalar *rvalues,*svalues; 5204 MatScalar *b_otha,*bufa,*bufA; 5205 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5206 MPI_Request *rwaits = NULL,*swaits = NULL; 5207 MPI_Status *sstatus,rstatus; 5208 PetscMPIInt jj; 5209 PetscInt *cols,sbs,rbs; 5210 PetscScalar *vals; 5211 5212 PetscFunctionBegin; 5213 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5214 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5215 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5216 } 5217 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5218 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5219 5220 gen_to = (VecScatter_MPI_General*)ctx->todata; 5221 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5222 rvalues = gen_from->values; /* holds the length of receiving row */ 5223 svalues = gen_to->values; /* holds the length of sending row */ 5224 nrecvs = gen_from->n; 5225 nsends = gen_to->n; 5226 5227 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5228 srow = gen_to->indices; /* local row index to be sent */ 5229 sstarts = gen_to->starts; 5230 sprocs = gen_to->procs; 5231 sstatus = gen_to->sstatus; 5232 sbs = gen_to->bs; 5233 rstarts = gen_from->starts; 5234 rprocs = gen_from->procs; 5235 rbs = gen_from->bs; 5236 5237 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5238 if (scall == MAT_INITIAL_MATRIX) { 5239 /* i-array */ 5240 /*---------*/ 5241 /* post receives */ 5242 for (i=0; i<nrecvs; i++) { 5243 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5244 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5245 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5246 } 5247 5248 /* pack the outgoing message */ 5249 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5250 5251 sstartsj[0] = 0; 5252 rstartsj[0] = 0; 5253 len = 0; /* total length of j or a array to be sent */ 5254 k = 0; 5255 for (i=0; i<nsends; i++) { 5256 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5257 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5258 for (j=0; j<nrows; j++) { 5259 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5260 for (l=0; l<sbs; l++) { 5261 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5262 5263 rowlen[j*sbs+l] = ncols; 5264 5265 len += ncols; 5266 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5267 } 5268 k++; 5269 } 5270 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5271 5272 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5273 } 5274 /* recvs and sends of i-array are completed */ 5275 i = nrecvs; 5276 while (i--) { 5277 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5278 } 5279 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5280 5281 /* allocate buffers for sending j and a arrays */ 5282 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5283 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5284 5285 /* create i-array of B_oth */ 5286 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5287 5288 b_othi[0] = 0; 5289 len = 0; /* total length of j or a array to be received */ 5290 k = 0; 5291 for (i=0; i<nrecvs; i++) { 5292 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5293 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5294 for (j=0; j<nrows; j++) { 5295 b_othi[k+1] = b_othi[k] + rowlen[j]; 5296 len += rowlen[j]; k++; 5297 } 5298 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5299 } 5300 5301 /* allocate space for j and a arrrays of B_oth */ 5302 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5303 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5304 5305 /* j-array */ 5306 /*---------*/ 5307 /* post receives of j-array */ 5308 for (i=0; i<nrecvs; i++) { 5309 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5310 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5311 } 5312 5313 /* pack the outgoing message j-array */ 5314 k = 0; 5315 for (i=0; i<nsends; i++) { 5316 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5317 bufJ = bufj+sstartsj[i]; 5318 for (j=0; j<nrows; j++) { 5319 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5320 for (ll=0; ll<sbs; ll++) { 5321 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5322 for (l=0; l<ncols; l++) { 5323 *bufJ++ = cols[l]; 5324 } 5325 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5326 } 5327 } 5328 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* recvs and sends of j-array are completed */ 5332 i = nrecvs; 5333 while (i--) { 5334 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5335 } 5336 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5337 } else if (scall == MAT_REUSE_MATRIX) { 5338 sstartsj = *startsj_s; 5339 rstartsj = *startsj_r; 5340 bufa = *bufa_ptr; 5341 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5342 b_otha = b_oth->a; 5343 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5344 5345 /* a-array */ 5346 /*---------*/ 5347 /* post receives of a-array */ 5348 for (i=0; i<nrecvs; i++) { 5349 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5350 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5351 } 5352 5353 /* pack the outgoing message a-array */ 5354 k = 0; 5355 for (i=0; i<nsends; i++) { 5356 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5357 bufA = bufa+sstartsj[i]; 5358 for (j=0; j<nrows; j++) { 5359 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5360 for (ll=0; ll<sbs; ll++) { 5361 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5362 for (l=0; l<ncols; l++) { 5363 *bufA++ = vals[l]; 5364 } 5365 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5366 } 5367 } 5368 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5369 } 5370 /* recvs and sends of a-array are completed */ 5371 i = nrecvs; 5372 while (i--) { 5373 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5374 } 5375 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5376 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5377 5378 if (scall == MAT_INITIAL_MATRIX) { 5379 /* put together the new matrix */ 5380 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5381 5382 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5383 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5384 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5385 b_oth->free_a = PETSC_TRUE; 5386 b_oth->free_ij = PETSC_TRUE; 5387 b_oth->nonew = 0; 5388 5389 ierr = PetscFree(bufj);CHKERRQ(ierr); 5390 if (!startsj_s || !bufa_ptr) { 5391 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5392 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5393 } else { 5394 *startsj_s = sstartsj; 5395 *startsj_r = rstartsj; 5396 *bufa_ptr = bufa; 5397 } 5398 } 5399 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5400 PetscFunctionReturn(0); 5401 } 5402 5403 #undef __FUNCT__ 5404 #define __FUNCT__ "MatGetCommunicationStructs" 5405 /*@C 5406 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5407 5408 Not Collective 5409 5410 Input Parameters: 5411 . A - The matrix in mpiaij format 5412 5413 Output Parameter: 5414 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5415 . colmap - A map from global column index to local index into lvec 5416 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5417 5418 Level: developer 5419 5420 @*/ 5421 #if defined(PETSC_USE_CTABLE) 5422 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5423 #else 5424 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5425 #endif 5426 { 5427 Mat_MPIAIJ *a; 5428 5429 PetscFunctionBegin; 5430 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5431 PetscValidPointer(lvec, 2); 5432 PetscValidPointer(colmap, 3); 5433 PetscValidPointer(multScatter, 4); 5434 a = (Mat_MPIAIJ*) A->data; 5435 if (lvec) *lvec = a->lvec; 5436 if (colmap) *colmap = a->colmap; 5437 if (multScatter) *multScatter = a->Mvctx; 5438 PetscFunctionReturn(0); 5439 } 5440 5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5444 5445 #undef __FUNCT__ 5446 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5447 /* 5448 Computes (B'*A')' since computing B*A directly is untenable 5449 5450 n p p 5451 ( ) ( ) ( ) 5452 m ( A ) * n ( B ) = m ( C ) 5453 ( ) ( ) ( ) 5454 5455 */ 5456 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5457 { 5458 PetscErrorCode ierr; 5459 Mat At,Bt,Ct; 5460 5461 PetscFunctionBegin; 5462 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5463 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5464 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5465 ierr = MatDestroy(&At);CHKERRQ(ierr); 5466 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5467 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5468 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5469 PetscFunctionReturn(0); 5470 } 5471 5472 #undef __FUNCT__ 5473 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5474 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5475 { 5476 PetscErrorCode ierr; 5477 PetscInt m=A->rmap->n,n=B->cmap->n; 5478 Mat Cmat; 5479 5480 PetscFunctionBegin; 5481 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5482 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5483 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5484 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5485 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5486 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5487 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5488 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 5490 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5491 5492 *C = Cmat; 5493 PetscFunctionReturn(0); 5494 } 5495 5496 /* ----------------------------------------------------------------*/ 5497 #undef __FUNCT__ 5498 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5499 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5500 { 5501 PetscErrorCode ierr; 5502 5503 PetscFunctionBegin; 5504 if (scall == MAT_INITIAL_MATRIX) { 5505 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5506 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5507 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5508 } 5509 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5510 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5511 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5512 PetscFunctionReturn(0); 5513 } 5514 5515 #if defined(PETSC_HAVE_MUMPS) 5516 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5517 #endif 5518 #if defined(PETSC_HAVE_PASTIX) 5519 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5520 #endif 5521 #if defined(PETSC_HAVE_SUPERLU_DIST) 5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_CLIQUE) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5526 #endif 5527 5528 /*MC 5529 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5530 5531 Options Database Keys: 5532 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5533 5534 Level: beginner 5535 5536 .seealso: MatCreateAIJ() 5537 M*/ 5538 5539 #undef __FUNCT__ 5540 #define __FUNCT__ "MatCreate_MPIAIJ" 5541 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5542 { 5543 Mat_MPIAIJ *b; 5544 PetscErrorCode ierr; 5545 PetscMPIInt size; 5546 5547 PetscFunctionBegin; 5548 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5549 5550 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5551 B->data = (void*)b; 5552 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5553 B->assembled = PETSC_FALSE; 5554 B->insertmode = NOT_SET_VALUES; 5555 b->size = size; 5556 5557 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5558 5559 /* build cache for off array entries formed */ 5560 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5561 5562 b->donotstash = PETSC_FALSE; 5563 b->colmap = 0; 5564 b->garray = 0; 5565 b->roworiented = PETSC_TRUE; 5566 5567 /* stuff used for matrix vector multiply */ 5568 b->lvec = NULL; 5569 b->Mvctx = NULL; 5570 5571 /* stuff for MatGetRow() */ 5572 b->rowindices = 0; 5573 b->rowvalues = 0; 5574 b->getrowactive = PETSC_FALSE; 5575 5576 /* flexible pointer used in CUSP/CUSPARSE classes */ 5577 b->spptr = NULL; 5578 5579 #if defined(PETSC_HAVE_MUMPS) 5580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5581 #endif 5582 #if defined(PETSC_HAVE_PASTIX) 5583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5584 #endif 5585 #if defined(PETSC_HAVE_SUPERLU_DIST) 5586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_CLIQUE) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5590 #endif 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5605 PetscFunctionReturn(0); 5606 } 5607 5608 #undef __FUNCT__ 5609 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5610 /*@ 5611 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5612 and "off-diagonal" part of the matrix in CSR format. 5613 5614 Collective on MPI_Comm 5615 5616 Input Parameters: 5617 + comm - MPI communicator 5618 . m - number of local rows (Cannot be PETSC_DECIDE) 5619 . n - This value should be the same as the local size used in creating the 5620 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5621 calculated if N is given) For square matrices n is almost always m. 5622 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5623 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5624 . i - row indices for "diagonal" portion of matrix 5625 . j - column indices 5626 . a - matrix values 5627 . oi - row indices for "off-diagonal" portion of matrix 5628 . oj - column indices 5629 - oa - matrix values 5630 5631 Output Parameter: 5632 . mat - the matrix 5633 5634 Level: advanced 5635 5636 Notes: 5637 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5638 must free the arrays once the matrix has been destroyed and not before. 5639 5640 The i and j indices are 0 based 5641 5642 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5643 5644 This sets local rows and cannot be used to set off-processor values. 5645 5646 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5647 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5648 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5649 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5650 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5651 communication if it is known that only local entries will be set. 5652 5653 .keywords: matrix, aij, compressed row, sparse, parallel 5654 5655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5656 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5657 @*/ 5658 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5659 { 5660 PetscErrorCode ierr; 5661 Mat_MPIAIJ *maij; 5662 5663 PetscFunctionBegin; 5664 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5665 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5666 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5667 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5668 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5669 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5670 maij = (Mat_MPIAIJ*) (*mat)->data; 5671 5672 (*mat)->preallocated = PETSC_TRUE; 5673 5674 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5675 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5676 5677 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5678 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5679 5680 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5681 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 5685 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5688 PetscFunctionReturn(0); 5689 } 5690 5691 /* 5692 Special version for direct calls from Fortran 5693 */ 5694 #include <petsc-private/fortranimpl.h> 5695 5696 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5697 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5698 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5699 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5700 #endif 5701 5702 /* Change these macros so can be used in void function */ 5703 #undef CHKERRQ 5704 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5705 #undef SETERRQ2 5706 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5707 #undef SETERRQ3 5708 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5709 #undef SETERRQ 5710 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5711 5712 #undef __FUNCT__ 5713 #define __FUNCT__ "matsetvaluesmpiaij_" 5714 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5715 { 5716 Mat mat = *mmat; 5717 PetscInt m = *mm, n = *mn; 5718 InsertMode addv = *maddv; 5719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5720 PetscScalar value; 5721 PetscErrorCode ierr; 5722 5723 MatCheckPreallocated(mat,1); 5724 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5725 5726 #if defined(PETSC_USE_DEBUG) 5727 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5728 #endif 5729 { 5730 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5731 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5732 PetscBool roworiented = aij->roworiented; 5733 5734 /* Some Variables required in the macro */ 5735 Mat A = aij->A; 5736 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5737 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5738 MatScalar *aa = a->a; 5739 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5740 Mat B = aij->B; 5741 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5742 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5743 MatScalar *ba = b->a; 5744 5745 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5746 PetscInt nonew = a->nonew; 5747 MatScalar *ap1,*ap2; 5748 5749 PetscFunctionBegin; 5750 for (i=0; i<m; i++) { 5751 if (im[i] < 0) continue; 5752 #if defined(PETSC_USE_DEBUG) 5753 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5754 #endif 5755 if (im[i] >= rstart && im[i] < rend) { 5756 row = im[i] - rstart; 5757 lastcol1 = -1; 5758 rp1 = aj + ai[row]; 5759 ap1 = aa + ai[row]; 5760 rmax1 = aimax[row]; 5761 nrow1 = ailen[row]; 5762 low1 = 0; 5763 high1 = nrow1; 5764 lastcol2 = -1; 5765 rp2 = bj + bi[row]; 5766 ap2 = ba + bi[row]; 5767 rmax2 = bimax[row]; 5768 nrow2 = bilen[row]; 5769 low2 = 0; 5770 high2 = nrow2; 5771 5772 for (j=0; j<n; j++) { 5773 if (roworiented) value = v[i*n+j]; 5774 else value = v[i+j*m]; 5775 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5776 if (in[j] >= cstart && in[j] < cend) { 5777 col = in[j] - cstart; 5778 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5779 } else if (in[j] < 0) continue; 5780 #if defined(PETSC_USE_DEBUG) 5781 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5782 #endif 5783 else { 5784 if (mat->was_assembled) { 5785 if (!aij->colmap) { 5786 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5787 } 5788 #if defined(PETSC_USE_CTABLE) 5789 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5790 col--; 5791 #else 5792 col = aij->colmap[in[j]] - 1; 5793 #endif 5794 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5795 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5796 col = in[j]; 5797 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5798 B = aij->B; 5799 b = (Mat_SeqAIJ*)B->data; 5800 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5801 rp2 = bj + bi[row]; 5802 ap2 = ba + bi[row]; 5803 rmax2 = bimax[row]; 5804 nrow2 = bilen[row]; 5805 low2 = 0; 5806 high2 = nrow2; 5807 bm = aij->B->rmap->n; 5808 ba = b->a; 5809 } 5810 } else col = in[j]; 5811 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5812 } 5813 } 5814 } else if (!aij->donotstash) { 5815 if (roworiented) { 5816 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5817 } else { 5818 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5819 } 5820 } 5821 } 5822 } 5823 PetscFunctionReturnVoid(); 5824 } 5825 5826