1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 #if defined(PETSC_HAVE_TXPETSCGPU) 1830 // Sorry, I don't know how else to add MatOptions for data specific 1831 // to GPU classes without protecting against it in this case statement. 1832 // If anyone knows of a better way, let me know. Paul 1833 case MAT_DIAGBLOCK_CSR: 1834 case MAT_OFFDIAGBLOCK_CSR: 1835 case MAT_CSR: 1836 case MAT_DIAGBLOCK_DIA: 1837 case MAT_OFFDIAGBLOCK_DIA: 1838 case MAT_DIA: 1839 case MAT_DIAGBLOCK_ELL: 1840 case MAT_OFFDIAGBLOCK_ELL: 1841 case MAT_ELL: 1842 case MAT_DIAGBLOCK_HYB: 1843 case MAT_OFFDIAGBLOCK_HYB: 1844 case MAT_HYB: 1845 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1846 break; 1847 #endif 1848 default: 1849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatGetRow_MPIAIJ" 1856 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1859 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1860 PetscErrorCode ierr; 1861 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1862 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1863 PetscInt *cmap,*idx_p; 1864 1865 PetscFunctionBegin; 1866 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1867 mat->getrowactive = PETSC_TRUE; 1868 1869 if (!mat->rowvalues && (idx || v)) { 1870 /* 1871 allocate enough space to hold information from the longest row. 1872 */ 1873 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1874 PetscInt max = 1,tmp; 1875 for (i=0; i<matin->rmap->n; i++) { 1876 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1877 if (max < tmp) { max = tmp; } 1878 } 1879 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1880 } 1881 1882 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1883 lrow = row - rstart; 1884 1885 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1886 if (!v) {pvA = 0; pvB = 0;} 1887 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1888 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1889 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1890 nztot = nzA + nzB; 1891 1892 cmap = mat->garray; 1893 if (v || idx) { 1894 if (nztot) { 1895 /* Sort by increasing column numbers, assuming A and B already sorted */ 1896 PetscInt imark = -1; 1897 if (v) { 1898 *v = v_p = mat->rowvalues; 1899 for (i=0; i<nzB; i++) { 1900 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1901 else break; 1902 } 1903 imark = i; 1904 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1905 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1906 } 1907 if (idx) { 1908 *idx = idx_p = mat->rowindices; 1909 if (imark > -1) { 1910 for (i=0; i<imark; i++) { 1911 idx_p[i] = cmap[cworkB[i]]; 1912 } 1913 } else { 1914 for (i=0; i<nzB; i++) { 1915 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1916 else break; 1917 } 1918 imark = i; 1919 } 1920 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1921 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1922 } 1923 } else { 1924 if (idx) *idx = 0; 1925 if (v) *v = 0; 1926 } 1927 } 1928 *nz = nztot; 1929 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1930 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 1940 PetscFunctionBegin; 1941 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1942 aij->getrowactive = PETSC_FALSE; 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatNorm_MPIAIJ" 1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1949 { 1950 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1951 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1952 PetscErrorCode ierr; 1953 PetscInt i,j,cstart = mat->cmap->rstart; 1954 PetscReal sum = 0.0; 1955 MatScalar *v; 1956 1957 PetscFunctionBegin; 1958 if (aij->size == 1) { 1959 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1960 } else { 1961 if (type == NORM_FROBENIUS) { 1962 v = amat->a; 1963 for (i=0; i<amat->nz; i++) { 1964 #if defined(PETSC_USE_COMPLEX) 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 #else 1967 sum += (*v)*(*v); v++; 1968 #endif 1969 } 1970 v = bmat->a; 1971 for (i=0; i<bmat->nz; i++) { 1972 #if defined(PETSC_USE_COMPLEX) 1973 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1974 #else 1975 sum += (*v)*(*v); v++; 1976 #endif 1977 } 1978 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1979 *norm = PetscSqrtReal(*norm); 1980 } else if (type == NORM_1) { /* max column norm */ 1981 PetscReal *tmp,*tmp2; 1982 PetscInt *jj,*garray = aij->garray; 1983 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1984 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1985 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1986 *norm = 0.0; 1987 v = amat->a; jj = amat->j; 1988 for (j=0; j<amat->nz; j++) { 1989 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1990 } 1991 v = bmat->a; jj = bmat->j; 1992 for (j=0; j<bmat->nz; j++) { 1993 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1994 } 1995 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 for (j=0; j<mat->cmap->N; j++) { 1997 if (tmp2[j] > *norm) *norm = tmp2[j]; 1998 } 1999 ierr = PetscFree(tmp);CHKERRQ(ierr); 2000 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2001 } else if (type == NORM_INFINITY) { /* max row norm */ 2002 PetscReal ntemp = 0.0; 2003 for (j=0; j<aij->A->rmap->n; j++) { 2004 v = amat->a + amat->i[j]; 2005 sum = 0.0; 2006 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 v = bmat->a + bmat->i[j]; 2010 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2011 sum += PetscAbsScalar(*v); v++; 2012 } 2013 if (sum > ntemp) ntemp = sum; 2014 } 2015 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2016 } else { 2017 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2018 } 2019 } 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatTranspose_MPIAIJ" 2025 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2028 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2029 PetscErrorCode ierr; 2030 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2031 PetscInt cstart=A->cmap->rstart,ncol; 2032 Mat B; 2033 MatScalar *array; 2034 2035 PetscFunctionBegin; 2036 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2037 2038 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2039 ai = Aloc->i; aj = Aloc->j; 2040 bi = Bloc->i; bj = Bloc->j; 2041 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2042 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2043 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2044 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2045 for (i=0; i<ai[ma]; i++){ 2046 d_nnz[aj[i]] ++; 2047 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2048 } 2049 2050 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2051 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2052 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2053 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2054 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2055 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2056 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2057 } else { 2058 B = *matout; 2059 } 2060 2061 /* copy over the A part */ 2062 array = Aloc->a; 2063 row = A->rmap->rstart; 2064 for (i=0; i<ma; i++) { 2065 ncol = ai[i+1]-ai[i]; 2066 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2067 row++; array += ncol; aj += ncol; 2068 } 2069 aj = Aloc->j; 2070 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2071 2072 /* copy over the B part */ 2073 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2074 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2075 array = Bloc->a; 2076 row = A->rmap->rstart; 2077 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2078 cols_tmp = cols; 2079 for (i=0; i<mb; i++) { 2080 ncol = bi[i+1]-bi[i]; 2081 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2082 row++; array += ncol; cols_tmp += ncol; 2083 } 2084 ierr = PetscFree(cols);CHKERRQ(ierr); 2085 2086 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2089 *matout = B; 2090 } else { 2091 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2099 { 2100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2101 Mat a = aij->A,b = aij->B; 2102 PetscErrorCode ierr; 2103 PetscInt s1,s2,s3; 2104 2105 PetscFunctionBegin; 2106 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2107 if (rr) { 2108 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2109 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2110 /* Overlap communication with computation. */ 2111 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 } 2113 if (ll) { 2114 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2115 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2116 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2117 } 2118 /* scale the diagonal block */ 2119 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2120 2121 if (rr) { 2122 /* Do a scatter end and then right scale the off-diagonal block */ 2123 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2125 } 2126 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatEqual_MPIAIJ" 2144 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2145 { 2146 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2147 Mat a,b,c,d; 2148 PetscBool flg; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 a = matA->A; b = matA->B; 2153 c = matB->A; d = matB->B; 2154 2155 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2156 if (flg) { 2157 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2158 } 2159 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatCopy_MPIAIJ" 2165 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2166 { 2167 PetscErrorCode ierr; 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2169 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2170 2171 PetscFunctionBegin; 2172 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2173 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2174 /* because of the column compression in the off-processor part of the matrix a->B, 2175 the number of columns in a->B and b->B may be different, hence we cannot call 2176 the MatCopy() directly on the two parts. If need be, we can provide a more 2177 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2178 then copying the submatrices */ 2179 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2182 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2183 } 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatSetUp_MPIAIJ" 2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2190 { 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2200 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2201 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2202 { 2203 PetscInt i,m=Y->rmap->N; 2204 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2205 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2206 const PetscInt *xi = x->i,*yi = y->i; 2207 2208 PetscFunctionBegin; 2209 /* Set the number of nonzeros in the new matrix */ 2210 for(i=0; i<m; i++) { 2211 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2212 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2213 nnz[i] = 0; 2214 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2215 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2216 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2217 nnz[i]++; 2218 } 2219 for (; k<nzy; k++) nnz[i]++; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatAXPY_MPIAIJ" 2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2227 { 2228 PetscErrorCode ierr; 2229 PetscInt i; 2230 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2231 PetscBLASInt bnz,one=1; 2232 Mat_SeqAIJ *x,*y; 2233 2234 PetscFunctionBegin; 2235 if (str == SAME_NONZERO_PATTERN) { 2236 PetscScalar alpha = a; 2237 x = (Mat_SeqAIJ *)xx->A->data; 2238 y = (Mat_SeqAIJ *)yy->A->data; 2239 bnz = PetscBLASIntCast(x->nz); 2240 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2241 x = (Mat_SeqAIJ *)xx->B->data; 2242 y = (Mat_SeqAIJ *)yy->B->data; 2243 bnz = PetscBLASIntCast(x->nz); 2244 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2245 } else if (str == SUBSET_NONZERO_PATTERN) { 2246 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2247 2248 x = (Mat_SeqAIJ *)xx->B->data; 2249 y = (Mat_SeqAIJ *)yy->B->data; 2250 if (y->xtoy && y->XtoY != xx->B) { 2251 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2252 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2253 } 2254 if (!y->xtoy) { /* get xtoy */ 2255 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2256 y->XtoY = xx->B; 2257 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2258 } 2259 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2260 } else { 2261 Mat B; 2262 PetscInt *nnz_d,*nnz_o; 2263 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2264 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2265 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2266 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2267 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2268 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2269 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2270 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2271 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2272 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2273 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2274 ierr = MatHeaderReplace(Y,B); 2275 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2276 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2277 } 2278 PetscFunctionReturn(0); 2279 } 2280 2281 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatConjugate_MPIAIJ" 2285 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2286 { 2287 #if defined(PETSC_USE_COMPLEX) 2288 PetscErrorCode ierr; 2289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2290 2291 PetscFunctionBegin; 2292 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2293 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2294 #else 2295 PetscFunctionBegin; 2296 #endif 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatRealPart_MPIAIJ" 2302 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2309 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2315 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2322 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #ifdef PETSC_HAVE_PBGL 2327 2328 #include <boost/parallel/mpi/bsp_process_group.hpp> 2329 #include <boost/graph/distributed/ilu_default_graph.hpp> 2330 #include <boost/graph/distributed/ilu_0_block.hpp> 2331 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2332 #include <boost/graph/distributed/petsc/interface.hpp> 2333 #include <boost/multi_array.hpp> 2334 #include <boost/parallel/distributed_property_map->hpp> 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2338 /* 2339 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2340 */ 2341 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2342 { 2343 namespace petsc = boost::distributed::petsc; 2344 2345 namespace graph_dist = boost::graph::distributed; 2346 using boost::graph::distributed::ilu_default::process_group_type; 2347 using boost::graph::ilu_permuted; 2348 2349 PetscBool row_identity, col_identity; 2350 PetscContainer c; 2351 PetscInt m, n, M, N; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2356 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2357 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2358 if (!row_identity || !col_identity) { 2359 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2360 } 2361 2362 process_group_type pg; 2363 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2364 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2365 lgraph_type& level_graph = *lgraph_p; 2366 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2367 2368 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2369 ilu_permuted(level_graph); 2370 2371 /* put together the new matrix */ 2372 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2373 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2374 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2375 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2376 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2377 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2378 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2379 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2380 2381 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2382 ierr = PetscContainerSetPointer(c, lgraph_p); 2383 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2384 ierr = PetscContainerDestroy(&c); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2390 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2391 { 2392 PetscFunctionBegin; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSolve_MPIAIJ" 2398 /* 2399 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2400 */ 2401 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2402 { 2403 namespace graph_dist = boost::graph::distributed; 2404 2405 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2406 lgraph_type* lgraph_p; 2407 PetscContainer c; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2412 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2413 ierr = VecCopy(b, x);CHKERRQ(ierr); 2414 2415 PetscScalar* array_x; 2416 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2417 PetscInt sx; 2418 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2419 2420 PetscScalar* array_b; 2421 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2422 PetscInt sb; 2423 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2424 2425 lgraph_type& level_graph = *lgraph_p; 2426 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2427 2428 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2429 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2430 ref_x(array_x, boost::extents[num_vertices(graph)]); 2431 2432 typedef boost::iterator_property_map<array_ref_type::iterator, 2433 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2434 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2435 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2436 2437 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2438 2439 PetscFunctionReturn(0); 2440 } 2441 #endif 2442 2443 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2444 PetscInt nzlocal,nsends,nrecvs; 2445 PetscMPIInt *send_rank,*recv_rank; 2446 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2447 PetscScalar *sbuf_a,**rbuf_a; 2448 PetscErrorCode (*Destroy)(Mat); 2449 } Mat_Redundant; 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2453 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2454 { 2455 PetscErrorCode ierr; 2456 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2457 PetscInt i; 2458 2459 PetscFunctionBegin; 2460 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2461 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2462 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2463 for (i=0; i<redund->nrecvs; i++){ 2464 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2465 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2468 ierr = PetscFree(redund);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatDestroy_MatRedundant" 2474 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2475 { 2476 PetscErrorCode ierr; 2477 PetscContainer container; 2478 Mat_Redundant *redund=PETSC_NULL; 2479 2480 PetscFunctionBegin; 2481 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2482 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2483 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2484 A->ops->destroy = redund->Destroy; 2485 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2486 if (A->ops->destroy) { 2487 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2488 } 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2494 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2495 { 2496 PetscMPIInt rank,size; 2497 MPI_Comm comm=((PetscObject)mat)->comm; 2498 PetscErrorCode ierr; 2499 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2500 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2501 PetscInt *rowrange=mat->rmap->range; 2502 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2503 Mat A=aij->A,B=aij->B,C=*matredundant; 2504 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2505 PetscScalar *sbuf_a; 2506 PetscInt nzlocal=a->nz+b->nz; 2507 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2508 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2509 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2510 MatScalar *aworkA,*aworkB; 2511 PetscScalar *vals; 2512 PetscMPIInt tag1,tag2,tag3,imdex; 2513 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2514 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2515 MPI_Status recv_status,*send_status; 2516 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2517 PetscInt **rbuf_j=PETSC_NULL; 2518 PetscScalar **rbuf_a=PETSC_NULL; 2519 Mat_Redundant *redund=PETSC_NULL; 2520 PetscContainer container; 2521 2522 PetscFunctionBegin; 2523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2525 2526 if (reuse == MAT_REUSE_MATRIX) { 2527 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2528 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2529 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2530 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2531 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2532 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2533 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2534 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2535 2536 nsends = redund->nsends; 2537 nrecvs = redund->nrecvs; 2538 send_rank = redund->send_rank; 2539 recv_rank = redund->recv_rank; 2540 sbuf_nz = redund->sbuf_nz; 2541 rbuf_nz = redund->rbuf_nz; 2542 sbuf_j = redund->sbuf_j; 2543 sbuf_a = redund->sbuf_a; 2544 rbuf_j = redund->rbuf_j; 2545 rbuf_a = redund->rbuf_a; 2546 } 2547 2548 if (reuse == MAT_INITIAL_MATRIX){ 2549 PetscMPIInt subrank,subsize; 2550 PetscInt nleftover,np_subcomm; 2551 /* get the destination processors' id send_rank, nsends and nrecvs */ 2552 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2553 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2554 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2555 np_subcomm = size/nsubcomm; 2556 nleftover = size - nsubcomm*np_subcomm; 2557 nsends = 0; nrecvs = 0; 2558 for (i=0; i<size; i++){ /* i=rank*/ 2559 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2560 send_rank[nsends] = i; nsends++; 2561 recv_rank[nrecvs++] = i; 2562 } 2563 } 2564 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2565 i = size-nleftover-1; 2566 j = 0; 2567 while (j < nsubcomm - nleftover){ 2568 send_rank[nsends++] = i; 2569 i--; j++; 2570 } 2571 } 2572 2573 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2574 for (i=0; i<nleftover; i++){ 2575 recv_rank[nrecvs++] = size-nleftover+i; 2576 } 2577 } 2578 2579 /* allocate sbuf_j, sbuf_a */ 2580 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2581 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2582 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2583 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2584 2585 /* copy mat's local entries into the buffers */ 2586 if (reuse == MAT_INITIAL_MATRIX){ 2587 rownz_max = 0; 2588 rptr = sbuf_j; 2589 cols = sbuf_j + rend-rstart + 1; 2590 vals = sbuf_a; 2591 rptr[0] = 0; 2592 for (i=0; i<rend-rstart; i++){ 2593 row = i + rstart; 2594 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2595 ncols = nzA + nzB; 2596 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2597 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2598 /* load the column indices for this row into cols */ 2599 lwrite = 0; 2600 for (l=0; l<nzB; l++) { 2601 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2602 vals[lwrite] = aworkB[l]; 2603 cols[lwrite++] = ctmp; 2604 } 2605 } 2606 for (l=0; l<nzA; l++){ 2607 vals[lwrite] = aworkA[l]; 2608 cols[lwrite++] = cstart + cworkA[l]; 2609 } 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2612 vals[lwrite] = aworkB[l]; 2613 cols[lwrite++] = ctmp; 2614 } 2615 } 2616 vals += ncols; 2617 cols += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 if (rownz_max < ncols) rownz_max = ncols; 2620 } 2621 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2622 } else { /* only copy matrix values into sbuf_a */ 2623 rptr = sbuf_j; 2624 vals = sbuf_a; 2625 rptr[0] = 0; 2626 for (i=0; i<rend-rstart; i++){ 2627 row = i + rstart; 2628 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2629 ncols = nzA + nzB; 2630 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2631 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2632 lwrite = 0; 2633 for (l=0; l<nzB; l++) { 2634 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2635 } 2636 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2637 for (l=0; l<nzB; l++) { 2638 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2639 } 2640 vals += ncols; 2641 rptr[i+1] = rptr[i] + ncols; 2642 } 2643 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2644 2645 /* send nzlocal to others, and recv other's nzlocal */ 2646 /*--------------------------------------------------*/ 2647 if (reuse == MAT_INITIAL_MATRIX){ 2648 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2649 s_waits2 = s_waits3 + nsends; 2650 s_waits1 = s_waits2 + nsends; 2651 r_waits1 = s_waits1 + nsends; 2652 r_waits2 = r_waits1 + nrecvs; 2653 r_waits3 = r_waits2 + nrecvs; 2654 } else { 2655 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2656 r_waits3 = s_waits3 + nsends; 2657 } 2658 2659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2660 if (reuse == MAT_INITIAL_MATRIX){ 2661 /* get new tags to keep the communication clean */ 2662 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2663 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2664 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2665 2666 /* post receives of other's nzlocal */ 2667 for (i=0; i<nrecvs; i++){ 2668 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2669 } 2670 /* send nzlocal to others */ 2671 for (i=0; i<nsends; i++){ 2672 sbuf_nz[i] = nzlocal; 2673 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2674 } 2675 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2679 recv_rank[imdex] = recv_status.MPI_SOURCE; 2680 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2681 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2682 2683 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2684 rbuf_nz[imdex] += i + 2; 2685 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2686 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2687 count--; 2688 } 2689 /* wait on sends of nzlocal */ 2690 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2691 /* send mat->i,j to others, and recv from other's */ 2692 /*------------------------------------------------*/ 2693 for (i=0; i<nsends; i++){ 2694 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2695 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2696 } 2697 /* wait on receives of mat->i,j */ 2698 /*------------------------------*/ 2699 count = nrecvs; 2700 while (count) { 2701 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2702 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2703 count--; 2704 } 2705 /* wait on sends of mat->i,j */ 2706 /*---------------------------*/ 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2709 } 2710 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2711 2712 /* post receives, send and receive mat->a */ 2713 /*----------------------------------------*/ 2714 for (imdex=0; imdex<nrecvs; imdex++) { 2715 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2716 } 2717 for (i=0; i<nsends; i++){ 2718 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2719 } 2720 count = nrecvs; 2721 while (count) { 2722 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2723 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2724 count--; 2725 } 2726 if (nsends) { 2727 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2728 } 2729 2730 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2731 2732 /* create redundant matrix */ 2733 /*-------------------------*/ 2734 if (reuse == MAT_INITIAL_MATRIX){ 2735 /* compute rownz_max for preallocation */ 2736 for (imdex=0; imdex<nrecvs; imdex++){ 2737 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2738 rptr = rbuf_j[imdex]; 2739 for (i=0; i<j; i++){ 2740 ncols = rptr[i+1] - rptr[i]; 2741 if (rownz_max < ncols) rownz_max = ncols; 2742 } 2743 } 2744 2745 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2746 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2747 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2748 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2749 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2750 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2751 } else { 2752 C = *matredundant; 2753 } 2754 2755 /* insert local matrix entries */ 2756 rptr = sbuf_j; 2757 cols = sbuf_j + rend-rstart + 1; 2758 vals = sbuf_a; 2759 for (i=0; i<rend-rstart; i++){ 2760 row = i + rstart; 2761 ncols = rptr[i+1] - rptr[i]; 2762 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2763 vals += ncols; 2764 cols += ncols; 2765 } 2766 /* insert received matrix entries */ 2767 for (imdex=0; imdex<nrecvs; imdex++){ 2768 rstart = rowrange[recv_rank[imdex]]; 2769 rend = rowrange[recv_rank[imdex]+1]; 2770 rptr = rbuf_j[imdex]; 2771 cols = rbuf_j[imdex] + rend-rstart + 1; 2772 vals = rbuf_a[imdex]; 2773 for (i=0; i<rend-rstart; i++){ 2774 row = i + rstart; 2775 ncols = rptr[i+1] - rptr[i]; 2776 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2777 vals += ncols; 2778 cols += ncols; 2779 } 2780 } 2781 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2784 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2785 if (reuse == MAT_INITIAL_MATRIX) { 2786 PetscContainer container; 2787 *matredundant = C; 2788 /* create a supporting struct and attach it to C for reuse */ 2789 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2790 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2791 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2792 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2793 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2794 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2795 2796 redund->nzlocal = nzlocal; 2797 redund->nsends = nsends; 2798 redund->nrecvs = nrecvs; 2799 redund->send_rank = send_rank; 2800 redund->recv_rank = recv_rank; 2801 redund->sbuf_nz = sbuf_nz; 2802 redund->rbuf_nz = rbuf_nz; 2803 redund->sbuf_j = sbuf_j; 2804 redund->sbuf_a = sbuf_a; 2805 redund->rbuf_j = rbuf_j; 2806 redund->rbuf_a = rbuf_a; 2807 2808 redund->Destroy = C->ops->destroy; 2809 C->ops->destroy = MatDestroy_MatRedundant; 2810 } 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2816 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2817 { 2818 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2819 PetscErrorCode ierr; 2820 PetscInt i,*idxb = 0; 2821 PetscScalar *va,*vb; 2822 Vec vtmp; 2823 2824 PetscFunctionBegin; 2825 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2826 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2827 if (idx) { 2828 for (i=0; i<A->rmap->n; i++) { 2829 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2830 } 2831 } 2832 2833 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2834 if (idx) { 2835 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2836 } 2837 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2838 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2839 2840 for (i=0; i<A->rmap->n; i++){ 2841 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2842 va[i] = vb[i]; 2843 if (idx) idx[i] = a->garray[idxb[i]]; 2844 } 2845 } 2846 2847 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2848 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2849 ierr = PetscFree(idxb);CHKERRQ(ierr); 2850 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2856 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2857 { 2858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2859 PetscErrorCode ierr; 2860 PetscInt i,*idxb = 0; 2861 PetscScalar *va,*vb; 2862 Vec vtmp; 2863 2864 PetscFunctionBegin; 2865 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2866 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2867 if (idx) { 2868 for (i=0; i<A->cmap->n; i++) { 2869 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2870 } 2871 } 2872 2873 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2874 if (idx) { 2875 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2876 } 2877 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2878 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2879 2880 for (i=0; i<A->rmap->n; i++){ 2881 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2882 va[i] = vb[i]; 2883 if (idx) idx[i] = a->garray[idxb[i]]; 2884 } 2885 } 2886 2887 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2889 ierr = PetscFree(idxb);CHKERRQ(ierr); 2890 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2896 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2899 PetscInt n = A->rmap->n; 2900 PetscInt cstart = A->cmap->rstart; 2901 PetscInt *cmap = mat->garray; 2902 PetscInt *diagIdx, *offdiagIdx; 2903 Vec diagV, offdiagV; 2904 PetscScalar *a, *diagA, *offdiagA; 2905 PetscInt r; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2912 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2913 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 for(r = 0; r < n; ++r) { 2918 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2919 a[r] = diagA[r]; 2920 idx[r] = cstart + diagIdx[r]; 2921 } else { 2922 a[r] = offdiagA[r]; 2923 idx[r] = cmap[offdiagIdx[r]]; 2924 } 2925 } 2926 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2930 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2931 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2937 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2938 { 2939 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2940 PetscInt n = A->rmap->n; 2941 PetscInt cstart = A->cmap->rstart; 2942 PetscInt *cmap = mat->garray; 2943 PetscInt *diagIdx, *offdiagIdx; 2944 Vec diagV, offdiagV; 2945 PetscScalar *a, *diagA, *offdiagA; 2946 PetscInt r; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2952 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2953 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2954 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2955 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 for(r = 0; r < n; ++r) { 2959 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2960 a[r] = diagA[r]; 2961 idx[r] = cstart + diagIdx[r]; 2962 } else { 2963 a[r] = offdiagA[r]; 2964 idx[r] = cmap[offdiagIdx[r]]; 2965 } 2966 } 2967 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2969 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2970 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2971 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2972 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2978 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2979 { 2980 PetscErrorCode ierr; 2981 Mat *dummy; 2982 2983 PetscFunctionBegin; 2984 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2985 *newmat = *dummy; 2986 ierr = PetscFree(dummy);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2994 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2995 { 2996 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2997 PetscErrorCode ierr; 2998 2999 PetscFunctionBegin; 3000 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 3005 /* -------------------------------------------------------------------*/ 3006 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3007 MatGetRow_MPIAIJ, 3008 MatRestoreRow_MPIAIJ, 3009 MatMult_MPIAIJ, 3010 /* 4*/ MatMultAdd_MPIAIJ, 3011 MatMultTranspose_MPIAIJ, 3012 MatMultTransposeAdd_MPIAIJ, 3013 #ifdef PETSC_HAVE_PBGL 3014 MatSolve_MPIAIJ, 3015 #else 3016 0, 3017 #endif 3018 0, 3019 0, 3020 /*10*/ 0, 3021 0, 3022 0, 3023 MatSOR_MPIAIJ, 3024 MatTranspose_MPIAIJ, 3025 /*15*/ MatGetInfo_MPIAIJ, 3026 MatEqual_MPIAIJ, 3027 MatGetDiagonal_MPIAIJ, 3028 MatDiagonalScale_MPIAIJ, 3029 MatNorm_MPIAIJ, 3030 /*20*/ MatAssemblyBegin_MPIAIJ, 3031 MatAssemblyEnd_MPIAIJ, 3032 MatSetOption_MPIAIJ, 3033 MatZeroEntries_MPIAIJ, 3034 /*24*/ MatZeroRows_MPIAIJ, 3035 0, 3036 #ifdef PETSC_HAVE_PBGL 3037 0, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 /*29*/ MatSetUp_MPIAIJ, 3044 #ifdef PETSC_HAVE_PBGL 3045 0, 3046 #else 3047 0, 3048 #endif 3049 0, 3050 0, 3051 0, 3052 /*34*/ MatDuplicate_MPIAIJ, 3053 0, 3054 0, 3055 0, 3056 0, 3057 /*39*/ MatAXPY_MPIAIJ, 3058 MatGetSubMatrices_MPIAIJ, 3059 MatIncreaseOverlap_MPIAIJ, 3060 MatGetValues_MPIAIJ, 3061 MatCopy_MPIAIJ, 3062 /*44*/ MatGetRowMax_MPIAIJ, 3063 MatScale_MPIAIJ, 3064 0, 3065 0, 3066 MatZeroRowsColumns_MPIAIJ, 3067 /*49*/ 0, 3068 0, 3069 0, 3070 0, 3071 0, 3072 /*54*/ MatFDColoringCreate_MPIAIJ, 3073 0, 3074 MatSetUnfactored_MPIAIJ, 3075 MatPermute_MPIAIJ, 3076 0, 3077 /*59*/ MatGetSubMatrix_MPIAIJ, 3078 MatDestroy_MPIAIJ, 3079 MatView_MPIAIJ, 3080 0, 3081 0, 3082 /*64*/ 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3088 MatGetRowMinAbs_MPIAIJ, 3089 0, 3090 MatSetColoring_MPIAIJ, 3091 #if defined(PETSC_HAVE_ADIC) 3092 MatSetValuesAdic_MPIAIJ, 3093 #else 3094 0, 3095 #endif 3096 MatSetValuesAdifor_MPIAIJ, 3097 /*75*/ MatFDColoringApply_AIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*80*/ 0, 3103 0, 3104 0, 3105 /*83*/ MatLoad_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 0, 3111 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3112 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3113 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3114 MatPtAP_Basic, 3115 MatPtAPSymbolic_MPIAIJ, 3116 /*94*/ MatPtAPNumeric_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*99*/ 0, 3122 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3123 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3124 MatConjugate_MPIAIJ, 3125 0, 3126 /*104*/MatSetValuesRow_MPIAIJ, 3127 MatRealPart_MPIAIJ, 3128 MatImaginaryPart_MPIAIJ, 3129 0, 3130 0, 3131 /*109*/0, 3132 MatGetRedundantMatrix_MPIAIJ, 3133 MatGetRowMin_MPIAIJ, 3134 0, 3135 0, 3136 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3137 0, 3138 0, 3139 0, 3140 0, 3141 /*119*/0, 3142 0, 3143 0, 3144 0, 3145 MatGetMultiProcBlock_MPIAIJ, 3146 /*124*/MatFindNonZeroRows_MPIAIJ, 3147 MatGetColumnNorms_MPIAIJ, 3148 MatInvertBlockDiagonal_MPIAIJ, 3149 0, 3150 MatGetSubMatricesParallel_MPIAIJ, 3151 /*129*/0, 3152 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3153 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3154 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3155 0, 3156 /*134*/0, 3157 0, 3158 0, 3159 0, 3160 0 3161 }; 3162 3163 /* ----------------------------------------------------------------------------------------*/ 3164 3165 EXTERN_C_BEGIN 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3168 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3169 { 3170 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3175 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 EXTERN_C_END 3179 3180 EXTERN_C_BEGIN 3181 #undef __FUNCT__ 3182 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3183 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3184 { 3185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3186 PetscErrorCode ierr; 3187 3188 PetscFunctionBegin; 3189 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3190 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3191 PetscFunctionReturn(0); 3192 } 3193 EXTERN_C_END 3194 3195 EXTERN_C_BEGIN 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3198 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3199 { 3200 Mat_MPIAIJ *b; 3201 PetscErrorCode ierr; 3202 PetscInt i; 3203 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3204 3205 PetscFunctionBegin; 3206 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3207 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3208 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3209 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3210 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3211 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3212 3213 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3215 if (d_nnz) { 3216 for (i=0; i<B->rmap->n; i++) { 3217 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3218 } 3219 } 3220 if (o_nnz) { 3221 for (i=0; i<B->rmap->n; i++) { 3222 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3223 } 3224 } 3225 b = (Mat_MPIAIJ*)B->data; 3226 3227 if (!B->preallocated) { 3228 /* Explicitly create 2 MATSEQAIJ matrices. */ 3229 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3230 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3231 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3232 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3233 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3234 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3235 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3236 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3237 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3238 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3239 } 3240 3241 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3242 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3243 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3244 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3245 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3246 B->preallocated = PETSC_TRUE; 3247 PetscFunctionReturn(0); 3248 } 3249 EXTERN_C_END 3250 3251 #undef __FUNCT__ 3252 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3253 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3254 { 3255 Mat mat; 3256 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3257 PetscErrorCode ierr; 3258 3259 PetscFunctionBegin; 3260 *newmat = 0; 3261 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3262 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3263 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3264 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3265 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3266 a = (Mat_MPIAIJ*)mat->data; 3267 3268 mat->factortype = matin->factortype; 3269 mat->rmap->bs = matin->rmap->bs; 3270 mat->cmap->bs = matin->cmap->bs; 3271 mat->assembled = PETSC_TRUE; 3272 mat->insertmode = NOT_SET_VALUES; 3273 mat->preallocated = PETSC_TRUE; 3274 3275 a->size = oldmat->size; 3276 a->rank = oldmat->rank; 3277 a->donotstash = oldmat->donotstash; 3278 a->roworiented = oldmat->roworiented; 3279 a->rowindices = 0; 3280 a->rowvalues = 0; 3281 a->getrowactive = PETSC_FALSE; 3282 3283 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3284 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3285 3286 if (oldmat->colmap) { 3287 #if defined (PETSC_USE_CTABLE) 3288 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3289 #else 3290 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3291 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3292 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3293 #endif 3294 } else a->colmap = 0; 3295 if (oldmat->garray) { 3296 PetscInt len; 3297 len = oldmat->B->cmap->n; 3298 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3300 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3301 } else a->garray = 0; 3302 3303 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3305 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3306 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3307 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3308 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3309 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3310 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3311 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3312 *newmat = mat; 3313 PetscFunctionReturn(0); 3314 } 3315 3316 3317 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "MatLoad_MPIAIJ" 3320 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3321 { 3322 PetscScalar *vals,*svals; 3323 MPI_Comm comm = ((PetscObject)viewer)->comm; 3324 PetscErrorCode ierr; 3325 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3326 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3327 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3328 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3329 PetscInt cend,cstart,n,*rowners,sizesset=1; 3330 int fd; 3331 3332 PetscFunctionBegin; 3333 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3334 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3335 if (!rank) { 3336 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3337 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3338 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3339 } 3340 3341 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3342 3343 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3344 M = header[1]; N = header[2]; 3345 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3346 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3347 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3348 3349 /* If global sizes are set, check if they are consistent with that given in the file */ 3350 if (sizesset) { 3351 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3352 } 3353 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3354 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3355 3356 /* determine ownership of all rows */ 3357 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3358 else m = newMat->rmap->n; /* Set by user */ 3359 3360 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3361 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3362 3363 /* First process needs enough room for process with most rows */ 3364 if (!rank) { 3365 mmax = rowners[1]; 3366 for (i=2; i<size; i++) { 3367 mmax = PetscMax(mmax,rowners[i]); 3368 } 3369 } else mmax = m; 3370 3371 rowners[0] = 0; 3372 for (i=2; i<=size; i++) { 3373 rowners[i] += rowners[i-1]; 3374 } 3375 rstart = rowners[rank]; 3376 rend = rowners[rank+1]; 3377 3378 /* distribute row lengths to all processors */ 3379 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3380 if (!rank) { 3381 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3382 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3383 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3384 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3385 for (j=0; j<m; j++) { 3386 procsnz[0] += ourlens[j]; 3387 } 3388 for (i=1; i<size; i++) { 3389 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3390 /* calculate the number of nonzeros on each processor */ 3391 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3392 procsnz[i] += rowlengths[j]; 3393 } 3394 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3395 } 3396 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3397 } else { 3398 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3399 } 3400 3401 if (!rank) { 3402 /* determine max buffer needed and allocate it */ 3403 maxnz = 0; 3404 for (i=0; i<size; i++) { 3405 maxnz = PetscMax(maxnz,procsnz[i]); 3406 } 3407 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3408 3409 /* read in my part of the matrix column indices */ 3410 nz = procsnz[0]; 3411 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3412 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3413 3414 /* read in every one elses and ship off */ 3415 for (i=1; i<size; i++) { 3416 nz = procsnz[i]; 3417 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3418 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3419 } 3420 ierr = PetscFree(cols);CHKERRQ(ierr); 3421 } else { 3422 /* determine buffer space needed for message */ 3423 nz = 0; 3424 for (i=0; i<m; i++) { 3425 nz += ourlens[i]; 3426 } 3427 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3428 3429 /* receive message of column indices*/ 3430 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3431 } 3432 3433 /* determine column ownership if matrix is not square */ 3434 if (N != M) { 3435 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3436 else n = newMat->cmap->n; 3437 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3438 cstart = cend - n; 3439 } else { 3440 cstart = rstart; 3441 cend = rend; 3442 n = cend - cstart; 3443 } 3444 3445 /* loop over local rows, determining number of off diagonal entries */ 3446 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3447 jj = 0; 3448 for (i=0; i<m; i++) { 3449 for (j=0; j<ourlens[i]; j++) { 3450 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3451 jj++; 3452 } 3453 } 3454 3455 for (i=0; i<m; i++) { 3456 ourlens[i] -= offlens[i]; 3457 } 3458 if (!sizesset) { 3459 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3460 } 3461 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3462 3463 for (i=0; i<m; i++) { 3464 ourlens[i] += offlens[i]; 3465 } 3466 3467 if (!rank) { 3468 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3469 3470 /* read in my part of the matrix numerical values */ 3471 nz = procsnz[0]; 3472 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3473 3474 /* insert into matrix */ 3475 jj = rstart; 3476 smycols = mycols; 3477 svals = vals; 3478 for (i=0; i<m; i++) { 3479 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3480 smycols += ourlens[i]; 3481 svals += ourlens[i]; 3482 jj++; 3483 } 3484 3485 /* read in other processors and ship out */ 3486 for (i=1; i<size; i++) { 3487 nz = procsnz[i]; 3488 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3489 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3490 } 3491 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3492 } else { 3493 /* receive numeric values */ 3494 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3495 3496 /* receive message of values*/ 3497 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3498 3499 /* insert into matrix */ 3500 jj = rstart; 3501 smycols = mycols; 3502 svals = vals; 3503 for (i=0; i<m; i++) { 3504 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3505 smycols += ourlens[i]; 3506 svals += ourlens[i]; 3507 jj++; 3508 } 3509 } 3510 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3511 ierr = PetscFree(vals);CHKERRQ(ierr); 3512 ierr = PetscFree(mycols);CHKERRQ(ierr); 3513 ierr = PetscFree(rowners);CHKERRQ(ierr); 3514 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3515 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3522 { 3523 PetscErrorCode ierr; 3524 IS iscol_local; 3525 PetscInt csize; 3526 3527 PetscFunctionBegin; 3528 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3529 if (call == MAT_REUSE_MATRIX) { 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3531 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 } else { 3533 PetscInt cbs; 3534 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3535 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3536 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3537 } 3538 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3539 if (call == MAT_INITIAL_MATRIX) { 3540 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3541 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3548 /* 3549 Not great since it makes two copies of the submatrix, first an SeqAIJ 3550 in local and then by concatenating the local matrices the end result. 3551 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3552 3553 Note: This requires a sequential iscol with all indices. 3554 */ 3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3556 { 3557 PetscErrorCode ierr; 3558 PetscMPIInt rank,size; 3559 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3560 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3561 Mat *local,M,Mreuse; 3562 MatScalar *vwork,*aa; 3563 MPI_Comm comm = ((PetscObject)mat)->comm; 3564 Mat_SeqAIJ *aij; 3565 3566 3567 PetscFunctionBegin; 3568 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3569 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3570 3571 if (call == MAT_REUSE_MATRIX) { 3572 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3573 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3574 local = &Mreuse; 3575 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3576 } else { 3577 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3578 Mreuse = *local; 3579 ierr = PetscFree(local);CHKERRQ(ierr); 3580 } 3581 3582 /* 3583 m - number of local rows 3584 n - number of columns (same on all processors) 3585 rstart - first row in new global matrix generated 3586 */ 3587 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3588 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3589 if (call == MAT_INITIAL_MATRIX) { 3590 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3591 ii = aij->i; 3592 jj = aij->j; 3593 3594 /* 3595 Determine the number of non-zeros in the diagonal and off-diagonal 3596 portions of the matrix in order to do correct preallocation 3597 */ 3598 3599 /* first get start and end of "diagonal" columns */ 3600 if (csize == PETSC_DECIDE) { 3601 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3602 if (mglobal == n) { /* square matrix */ 3603 nlocal = m; 3604 } else { 3605 nlocal = n/size + ((n % size) > rank); 3606 } 3607 } else { 3608 nlocal = csize; 3609 } 3610 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3611 rstart = rend - nlocal; 3612 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3613 3614 /* next, compute all the lengths */ 3615 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3616 olens = dlens + m; 3617 for (i=0; i<m; i++) { 3618 jend = ii[i+1] - ii[i]; 3619 olen = 0; 3620 dlen = 0; 3621 for (j=0; j<jend; j++) { 3622 if (*jj < rstart || *jj >= rend) olen++; 3623 else dlen++; 3624 jj++; 3625 } 3626 olens[i] = olen; 3627 dlens[i] = dlen; 3628 } 3629 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3630 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3631 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3632 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3633 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3634 ierr = PetscFree(dlens);CHKERRQ(ierr); 3635 } else { 3636 PetscInt ml,nl; 3637 3638 M = *newmat; 3639 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3640 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3641 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3642 /* 3643 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3644 rather than the slower MatSetValues(). 3645 */ 3646 M->was_assembled = PETSC_TRUE; 3647 M->assembled = PETSC_FALSE; 3648 } 3649 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3650 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3651 ii = aij->i; 3652 jj = aij->j; 3653 aa = aij->a; 3654 for (i=0; i<m; i++) { 3655 row = rstart + i; 3656 nz = ii[i+1] - ii[i]; 3657 cwork = jj; jj += nz; 3658 vwork = aa; aa += nz; 3659 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3660 } 3661 3662 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3663 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3664 *newmat = M; 3665 3666 /* save submatrix used in processor for next request */ 3667 if (call == MAT_INITIAL_MATRIX) { 3668 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3669 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3670 } 3671 3672 PetscFunctionReturn(0); 3673 } 3674 3675 EXTERN_C_BEGIN 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3678 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3679 { 3680 PetscInt m,cstart, cend,j,nnz,i,d; 3681 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3682 const PetscInt *JJ; 3683 PetscScalar *values; 3684 PetscErrorCode ierr; 3685 3686 PetscFunctionBegin; 3687 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3688 3689 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3690 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3691 m = B->rmap->n; 3692 cstart = B->cmap->rstart; 3693 cend = B->cmap->rend; 3694 rstart = B->rmap->rstart; 3695 3696 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3697 3698 #if defined(PETSC_USE_DEBUGGING) 3699 for (i=0; i<m; i++) { 3700 nnz = Ii[i+1]- Ii[i]; 3701 JJ = J + Ii[i]; 3702 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3703 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3704 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3705 } 3706 #endif 3707 3708 for (i=0; i<m; i++) { 3709 nnz = Ii[i+1]- Ii[i]; 3710 JJ = J + Ii[i]; 3711 nnz_max = PetscMax(nnz_max,nnz); 3712 d = 0; 3713 for (j=0; j<nnz; j++) { 3714 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3715 } 3716 d_nnz[i] = d; 3717 o_nnz[i] = nnz - d; 3718 } 3719 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3720 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3721 3722 if (v) values = (PetscScalar*)v; 3723 else { 3724 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3725 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3726 } 3727 3728 for (i=0; i<m; i++) { 3729 ii = i + rstart; 3730 nnz = Ii[i+1]- Ii[i]; 3731 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3732 } 3733 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 3736 if (!v) { 3737 ierr = PetscFree(values);CHKERRQ(ierr); 3738 } 3739 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3740 PetscFunctionReturn(0); 3741 } 3742 EXTERN_C_END 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3746 /*@ 3747 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3748 (the default parallel PETSc format). 3749 3750 Collective on MPI_Comm 3751 3752 Input Parameters: 3753 + B - the matrix 3754 . i - the indices into j for the start of each local row (starts with zero) 3755 . j - the column indices for each local row (starts with zero) 3756 - v - optional values in the matrix 3757 3758 Level: developer 3759 3760 Notes: 3761 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3762 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3763 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3764 3765 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3766 3767 The format which is used for the sparse matrix input, is equivalent to a 3768 row-major ordering.. i.e for the following matrix, the input data expected is 3769 as shown: 3770 3771 1 0 0 3772 2 0 3 P0 3773 ------- 3774 4 5 6 P1 3775 3776 Process0 [P0]: rows_owned=[0,1] 3777 i = {0,1,3} [size = nrow+1 = 2+1] 3778 j = {0,0,2} [size = nz = 6] 3779 v = {1,2,3} [size = nz = 6] 3780 3781 Process1 [P1]: rows_owned=[2] 3782 i = {0,3} [size = nrow+1 = 1+1] 3783 j = {0,1,2} [size = nz = 6] 3784 v = {4,5,6} [size = nz = 6] 3785 3786 .keywords: matrix, aij, compressed row, sparse, parallel 3787 3788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3789 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3790 @*/ 3791 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3792 { 3793 PetscErrorCode ierr; 3794 3795 PetscFunctionBegin; 3796 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3797 PetscFunctionReturn(0); 3798 } 3799 3800 #undef __FUNCT__ 3801 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3802 /*@C 3803 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3804 (the default parallel PETSc format). For good matrix assembly performance 3805 the user should preallocate the matrix storage by setting the parameters 3806 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3807 performance can be increased by more than a factor of 50. 3808 3809 Collective on MPI_Comm 3810 3811 Input Parameters: 3812 + A - the matrix 3813 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3814 (same value is used for all local rows) 3815 . d_nnz - array containing the number of nonzeros in the various rows of the 3816 DIAGONAL portion of the local submatrix (possibly different for each row) 3817 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3818 The size of this array is equal to the number of local rows, i.e 'm'. 3819 For matrices that will be factored, you must leave room for (and set) 3820 the diagonal entry even if it is zero. 3821 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3822 submatrix (same value is used for all local rows). 3823 - o_nnz - array containing the number of nonzeros in the various rows of the 3824 OFF-DIAGONAL portion of the local submatrix (possibly different for 3825 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3826 structure. The size of this array is equal to the number 3827 of local rows, i.e 'm'. 3828 3829 If the *_nnz parameter is given then the *_nz parameter is ignored 3830 3831 The AIJ format (also called the Yale sparse matrix format or 3832 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3833 storage. The stored row and column indices begin with zero. 3834 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3835 3836 The parallel matrix is partitioned such that the first m0 rows belong to 3837 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3838 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3839 3840 The DIAGONAL portion of the local submatrix of a processor can be defined 3841 as the submatrix which is obtained by extraction the part corresponding to 3842 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3843 first row that belongs to the processor, r2 is the last row belonging to 3844 the this processor, and c1-c2 is range of indices of the local part of a 3845 vector suitable for applying the matrix to. This is an mxn matrix. In the 3846 common case of a square matrix, the row and column ranges are the same and 3847 the DIAGONAL part is also square. The remaining portion of the local 3848 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3849 3850 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3851 3852 You can call MatGetInfo() to get information on how effective the preallocation was; 3853 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3854 You can also run with the option -info and look for messages with the string 3855 malloc in them to see if additional memory allocation was needed. 3856 3857 Example usage: 3858 3859 Consider the following 8x8 matrix with 34 non-zero values, that is 3860 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3861 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3862 as follows: 3863 3864 .vb 3865 1 2 0 | 0 3 0 | 0 4 3866 Proc0 0 5 6 | 7 0 0 | 8 0 3867 9 0 10 | 11 0 0 | 12 0 3868 ------------------------------------- 3869 13 0 14 | 15 16 17 | 0 0 3870 Proc1 0 18 0 | 19 20 21 | 0 0 3871 0 0 0 | 22 23 0 | 24 0 3872 ------------------------------------- 3873 Proc2 25 26 27 | 0 0 28 | 29 0 3874 30 0 0 | 31 32 33 | 0 34 3875 .ve 3876 3877 This can be represented as a collection of submatrices as: 3878 3879 .vb 3880 A B C 3881 D E F 3882 G H I 3883 .ve 3884 3885 Where the submatrices A,B,C are owned by proc0, D,E,F are 3886 owned by proc1, G,H,I are owned by proc2. 3887 3888 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3889 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3890 The 'M','N' parameters are 8,8, and have the same values on all procs. 3891 3892 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3893 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3894 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3895 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3896 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3897 matrix, ans [DF] as another SeqAIJ matrix. 3898 3899 When d_nz, o_nz parameters are specified, d_nz storage elements are 3900 allocated for every row of the local diagonal submatrix, and o_nz 3901 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3902 One way to choose d_nz and o_nz is to use the max nonzerors per local 3903 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3904 In this case, the values of d_nz,o_nz are: 3905 .vb 3906 proc0 : dnz = 2, o_nz = 2 3907 proc1 : dnz = 3, o_nz = 2 3908 proc2 : dnz = 1, o_nz = 4 3909 .ve 3910 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3911 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3912 for proc3. i.e we are using 12+15+10=37 storage locations to store 3913 34 values. 3914 3915 When d_nnz, o_nnz parameters are specified, the storage is specified 3916 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3917 In the above case the values for d_nnz,o_nnz are: 3918 .vb 3919 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3920 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3921 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3922 .ve 3923 Here the space allocated is sum of all the above values i.e 34, and 3924 hence pre-allocation is perfect. 3925 3926 Level: intermediate 3927 3928 .keywords: matrix, aij, compressed row, sparse, parallel 3929 3930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3931 MPIAIJ, MatGetInfo() 3932 @*/ 3933 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3934 { 3935 PetscErrorCode ierr; 3936 3937 PetscFunctionBegin; 3938 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3939 PetscValidType(B,1); 3940 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3941 PetscFunctionReturn(0); 3942 } 3943 3944 #undef __FUNCT__ 3945 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3946 /*@ 3947 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3948 CSR format the local rows. 3949 3950 Collective on MPI_Comm 3951 3952 Input Parameters: 3953 + comm - MPI communicator 3954 . m - number of local rows (Cannot be PETSC_DECIDE) 3955 . n - This value should be the same as the local size used in creating the 3956 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3957 calculated if N is given) For square matrices n is almost always m. 3958 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3959 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3960 . i - row indices 3961 . j - column indices 3962 - a - matrix values 3963 3964 Output Parameter: 3965 . mat - the matrix 3966 3967 Level: intermediate 3968 3969 Notes: 3970 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3971 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3972 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3973 3974 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3975 3976 The format which is used for the sparse matrix input, is equivalent to a 3977 row-major ordering.. i.e for the following matrix, the input data expected is 3978 as shown: 3979 3980 1 0 0 3981 2 0 3 P0 3982 ------- 3983 4 5 6 P1 3984 3985 Process0 [P0]: rows_owned=[0,1] 3986 i = {0,1,3} [size = nrow+1 = 2+1] 3987 j = {0,0,2} [size = nz = 6] 3988 v = {1,2,3} [size = nz = 6] 3989 3990 Process1 [P1]: rows_owned=[2] 3991 i = {0,3} [size = nrow+1 = 1+1] 3992 j = {0,1,2} [size = nz = 6] 3993 v = {4,5,6} [size = nz = 6] 3994 3995 .keywords: matrix, aij, compressed row, sparse, parallel 3996 3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3998 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3999 @*/ 4000 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4001 { 4002 PetscErrorCode ierr; 4003 4004 PetscFunctionBegin; 4005 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4006 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4007 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4008 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4009 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4010 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4011 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4012 PetscFunctionReturn(0); 4013 } 4014 4015 #undef __FUNCT__ 4016 #define __FUNCT__ "MatCreateAIJ" 4017 /*@C 4018 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4019 (the default parallel PETSc format). For good matrix assembly performance 4020 the user should preallocate the matrix storage by setting the parameters 4021 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4022 performance can be increased by more than a factor of 50. 4023 4024 Collective on MPI_Comm 4025 4026 Input Parameters: 4027 + comm - MPI communicator 4028 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4029 This value should be the same as the local size used in creating the 4030 y vector for the matrix-vector product y = Ax. 4031 . n - This value should be the same as the local size used in creating the 4032 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4033 calculated if N is given) For square matrices n is almost always m. 4034 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4035 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4036 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4037 (same value is used for all local rows) 4038 . d_nnz - array containing the number of nonzeros in the various rows of the 4039 DIAGONAL portion of the local submatrix (possibly different for each row) 4040 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4041 The size of this array is equal to the number of local rows, i.e 'm'. 4042 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4043 submatrix (same value is used for all local rows). 4044 - o_nnz - array containing the number of nonzeros in the various rows of the 4045 OFF-DIAGONAL portion of the local submatrix (possibly different for 4046 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4047 structure. The size of this array is equal to the number 4048 of local rows, i.e 'm'. 4049 4050 Output Parameter: 4051 . A - the matrix 4052 4053 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4054 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4055 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4056 4057 Notes: 4058 If the *_nnz parameter is given then the *_nz parameter is ignored 4059 4060 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4061 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4062 storage requirements for this matrix. 4063 4064 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4065 processor than it must be used on all processors that share the object for 4066 that argument. 4067 4068 The user MUST specify either the local or global matrix dimensions 4069 (possibly both). 4070 4071 The parallel matrix is partitioned across processors such that the 4072 first m0 rows belong to process 0, the next m1 rows belong to 4073 process 1, the next m2 rows belong to process 2 etc.. where 4074 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4075 values corresponding to [m x N] submatrix. 4076 4077 The columns are logically partitioned with the n0 columns belonging 4078 to 0th partition, the next n1 columns belonging to the next 4079 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4080 4081 The DIAGONAL portion of the local submatrix on any given processor 4082 is the submatrix corresponding to the rows and columns m,n 4083 corresponding to the given processor. i.e diagonal matrix on 4084 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4085 etc. The remaining portion of the local submatrix [m x (N-n)] 4086 constitute the OFF-DIAGONAL portion. The example below better 4087 illustrates this concept. 4088 4089 For a square global matrix we define each processor's diagonal portion 4090 to be its local rows and the corresponding columns (a square submatrix); 4091 each processor's off-diagonal portion encompasses the remainder of the 4092 local matrix (a rectangular submatrix). 4093 4094 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4095 4096 When calling this routine with a single process communicator, a matrix of 4097 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4098 type of communicator, use the construction mechanism: 4099 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4100 4101 By default, this format uses inodes (identical nodes) when possible. 4102 We search for consecutive rows with the same nonzero structure, thereby 4103 reusing matrix information to achieve increased efficiency. 4104 4105 Options Database Keys: 4106 + -mat_no_inode - Do not use inodes 4107 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4108 - -mat_aij_oneindex - Internally use indexing starting at 1 4109 rather than 0. Note that when calling MatSetValues(), 4110 the user still MUST index entries starting at 0! 4111 4112 4113 Example usage: 4114 4115 Consider the following 8x8 matrix with 34 non-zero values, that is 4116 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4117 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4118 as follows: 4119 4120 .vb 4121 1 2 0 | 0 3 0 | 0 4 4122 Proc0 0 5 6 | 7 0 0 | 8 0 4123 9 0 10 | 11 0 0 | 12 0 4124 ------------------------------------- 4125 13 0 14 | 15 16 17 | 0 0 4126 Proc1 0 18 0 | 19 20 21 | 0 0 4127 0 0 0 | 22 23 0 | 24 0 4128 ------------------------------------- 4129 Proc2 25 26 27 | 0 0 28 | 29 0 4130 30 0 0 | 31 32 33 | 0 34 4131 .ve 4132 4133 This can be represented as a collection of submatrices as: 4134 4135 .vb 4136 A B C 4137 D E F 4138 G H I 4139 .ve 4140 4141 Where the submatrices A,B,C are owned by proc0, D,E,F are 4142 owned by proc1, G,H,I are owned by proc2. 4143 4144 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4145 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4146 The 'M','N' parameters are 8,8, and have the same values on all procs. 4147 4148 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4149 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4150 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4151 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4152 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4153 matrix, ans [DF] as another SeqAIJ matrix. 4154 4155 When d_nz, o_nz parameters are specified, d_nz storage elements are 4156 allocated for every row of the local diagonal submatrix, and o_nz 4157 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4158 One way to choose d_nz and o_nz is to use the max nonzerors per local 4159 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4160 In this case, the values of d_nz,o_nz are: 4161 .vb 4162 proc0 : dnz = 2, o_nz = 2 4163 proc1 : dnz = 3, o_nz = 2 4164 proc2 : dnz = 1, o_nz = 4 4165 .ve 4166 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4167 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4168 for proc3. i.e we are using 12+15+10=37 storage locations to store 4169 34 values. 4170 4171 When d_nnz, o_nnz parameters are specified, the storage is specified 4172 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4173 In the above case the values for d_nnz,o_nnz are: 4174 .vb 4175 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4176 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4177 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4178 .ve 4179 Here the space allocated is sum of all the above values i.e 34, and 4180 hence pre-allocation is perfect. 4181 4182 Level: intermediate 4183 4184 .keywords: matrix, aij, compressed row, sparse, parallel 4185 4186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4187 MPIAIJ, MatCreateMPIAIJWithArrays() 4188 @*/ 4189 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4190 { 4191 PetscErrorCode ierr; 4192 PetscMPIInt size; 4193 4194 PetscFunctionBegin; 4195 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4196 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4198 if (size > 1) { 4199 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4200 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4201 } else { 4202 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4203 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4204 } 4205 PetscFunctionReturn(0); 4206 } 4207 4208 #undef __FUNCT__ 4209 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4210 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4211 { 4212 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4213 4214 PetscFunctionBegin; 4215 *Ad = a->A; 4216 *Ao = a->B; 4217 *colmap = a->garray; 4218 PetscFunctionReturn(0); 4219 } 4220 4221 #undef __FUNCT__ 4222 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4223 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4224 { 4225 PetscErrorCode ierr; 4226 PetscInt i; 4227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4228 4229 PetscFunctionBegin; 4230 if (coloring->ctype == IS_COLORING_GLOBAL) { 4231 ISColoringValue *allcolors,*colors; 4232 ISColoring ocoloring; 4233 4234 /* set coloring for diagonal portion */ 4235 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4236 4237 /* set coloring for off-diagonal portion */ 4238 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4239 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4240 for (i=0; i<a->B->cmap->n; i++) { 4241 colors[i] = allcolors[a->garray[i]]; 4242 } 4243 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4244 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4245 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4246 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4247 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4248 ISColoringValue *colors; 4249 PetscInt *larray; 4250 ISColoring ocoloring; 4251 4252 /* set coloring for diagonal portion */ 4253 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4254 for (i=0; i<a->A->cmap->n; i++) { 4255 larray[i] = i + A->cmap->rstart; 4256 } 4257 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4258 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4259 for (i=0; i<a->A->cmap->n; i++) { 4260 colors[i] = coloring->colors[larray[i]]; 4261 } 4262 ierr = PetscFree(larray);CHKERRQ(ierr); 4263 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4264 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4265 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4266 4267 /* set coloring for off-diagonal portion */ 4268 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4269 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4270 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4271 for (i=0; i<a->B->cmap->n; i++) { 4272 colors[i] = coloring->colors[larray[i]]; 4273 } 4274 ierr = PetscFree(larray);CHKERRQ(ierr); 4275 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4276 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4277 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4278 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4279 4280 PetscFunctionReturn(0); 4281 } 4282 4283 #if defined(PETSC_HAVE_ADIC) 4284 #undef __FUNCT__ 4285 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4286 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4287 { 4288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4289 PetscErrorCode ierr; 4290 4291 PetscFunctionBegin; 4292 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4293 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4294 PetscFunctionReturn(0); 4295 } 4296 #endif 4297 4298 #undef __FUNCT__ 4299 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4300 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4301 { 4302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4303 PetscErrorCode ierr; 4304 4305 PetscFunctionBegin; 4306 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4307 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4313 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4314 { 4315 PetscErrorCode ierr; 4316 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4317 PetscInt *indx; 4318 4319 PetscFunctionBegin; 4320 /* This routine will ONLY return MPIAIJ type matrix */ 4321 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4322 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4323 if (n == PETSC_DECIDE){ 4324 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4325 } 4326 /* Check sum(n) = N */ 4327 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4328 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4329 4330 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4331 rstart -= m; 4332 4333 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4334 for (i=0;i<m;i++) { 4335 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4336 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4337 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4338 } 4339 4340 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4341 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4342 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4343 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4345 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4346 PetscFunctionReturn(0); 4347 } 4348 4349 #undef __FUNCT__ 4350 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4351 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4352 { 4353 PetscErrorCode ierr; 4354 PetscInt m,N,i,rstart,nnz,Ii; 4355 PetscInt *indx; 4356 PetscScalar *values; 4357 4358 PetscFunctionBegin; 4359 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4360 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4361 for (i=0;i<m;i++) { 4362 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4363 Ii = i + rstart; 4364 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4365 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4366 } 4367 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4368 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 4372 #undef __FUNCT__ 4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4374 /*@ 4375 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4376 matrices from each processor 4377 4378 Collective on MPI_Comm 4379 4380 Input Parameters: 4381 + comm - the communicators the parallel matrix will live on 4382 . inmat - the input sequential matrices 4383 . n - number of local columns (or PETSC_DECIDE) 4384 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4385 4386 Output Parameter: 4387 . outmat - the parallel matrix generated 4388 4389 Level: advanced 4390 4391 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4392 4393 @*/ 4394 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4395 { 4396 PetscErrorCode ierr; 4397 4398 PetscFunctionBegin; 4399 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4400 if (scall == MAT_INITIAL_MATRIX){ 4401 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4402 } 4403 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4404 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatFileSplit" 4410 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4411 { 4412 PetscErrorCode ierr; 4413 PetscMPIInt rank; 4414 PetscInt m,N,i,rstart,nnz; 4415 size_t len; 4416 const PetscInt *indx; 4417 PetscViewer out; 4418 char *name; 4419 Mat B; 4420 const PetscScalar *values; 4421 4422 PetscFunctionBegin; 4423 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4424 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4425 /* Should this be the type of the diagonal block of A? */ 4426 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4427 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4428 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4429 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4430 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4431 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4432 for (i=0;i<m;i++) { 4433 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4434 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4435 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4436 } 4437 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4438 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4439 4440 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4441 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4442 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4443 sprintf(name,"%s.%d",outfile,rank); 4444 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4445 ierr = PetscFree(name); 4446 ierr = MatView(B,out);CHKERRQ(ierr); 4447 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4448 ierr = MatDestroy(&B);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4455 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4456 { 4457 PetscErrorCode ierr; 4458 Mat_Merge_SeqsToMPI *merge; 4459 PetscContainer container; 4460 4461 PetscFunctionBegin; 4462 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4463 if (container) { 4464 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4477 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4478 ierr = PetscFree(merge);CHKERRQ(ierr); 4479 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4480 } 4481 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 #include <../src/mat/utils/freespace.h> 4486 #include <petscbt.h> 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4490 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4491 { 4492 PetscErrorCode ierr; 4493 MPI_Comm comm=((PetscObject)mpimat)->comm; 4494 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4495 PetscMPIInt size,rank,taga,*len_s; 4496 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4497 PetscInt proc,m; 4498 PetscInt **buf_ri,**buf_rj; 4499 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4500 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4501 MPI_Request *s_waits,*r_waits; 4502 MPI_Status *status; 4503 MatScalar *aa=a->a; 4504 MatScalar **abuf_r,*ba_i; 4505 Mat_Merge_SeqsToMPI *merge; 4506 PetscContainer container; 4507 4508 PetscFunctionBegin; 4509 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4510 4511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4512 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4513 4514 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4515 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4516 4517 bi = merge->bi; 4518 bj = merge->bj; 4519 buf_ri = merge->buf_ri; 4520 buf_rj = merge->buf_rj; 4521 4522 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4523 owners = merge->rowmap->range; 4524 len_s = merge->len_s; 4525 4526 /* send and recv matrix values */ 4527 /*-----------------------------*/ 4528 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4529 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4530 4531 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4532 for (proc=0,k=0; proc<size; proc++){ 4533 if (!len_s[proc]) continue; 4534 i = owners[proc]; 4535 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4536 k++; 4537 } 4538 4539 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4540 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4541 ierr = PetscFree(status);CHKERRQ(ierr); 4542 4543 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4544 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4545 4546 /* insert mat values of mpimat */ 4547 /*----------------------------*/ 4548 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4549 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4550 4551 for (k=0; k<merge->nrecv; k++){ 4552 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4553 nrows = *(buf_ri_k[k]); 4554 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4555 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4556 } 4557 4558 /* set values of ba */ 4559 m = merge->rowmap->n; 4560 for (i=0; i<m; i++) { 4561 arow = owners[rank] + i; 4562 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4563 bnzi = bi[i+1] - bi[i]; 4564 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4565 4566 /* add local non-zero vals of this proc's seqmat into ba */ 4567 anzi = ai[arow+1] - ai[arow]; 4568 aj = a->j + ai[arow]; 4569 aa = a->a + ai[arow]; 4570 nextaj = 0; 4571 for (j=0; nextaj<anzi; j++){ 4572 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4573 ba_i[j] += aa[nextaj++]; 4574 } 4575 } 4576 4577 /* add received vals into ba */ 4578 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4579 /* i-th row */ 4580 if (i == *nextrow[k]) { 4581 anzi = *(nextai[k]+1) - *nextai[k]; 4582 aj = buf_rj[k] + *(nextai[k]); 4583 aa = abuf_r[k] + *(nextai[k]); 4584 nextaj = 0; 4585 for (j=0; nextaj<anzi; j++){ 4586 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4587 ba_i[j] += aa[nextaj++]; 4588 } 4589 } 4590 nextrow[k]++; nextai[k]++; 4591 } 4592 } 4593 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4594 } 4595 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 4598 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4599 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4600 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4601 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4602 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4603 PetscFunctionReturn(0); 4604 } 4605 4606 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4607 4608 #undef __FUNCT__ 4609 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4610 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4611 { 4612 PetscErrorCode ierr; 4613 Mat B_mpi; 4614 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4615 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4616 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4617 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4618 PetscInt len,proc,*dnz,*onz,bs,cbs; 4619 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4620 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4621 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4622 MPI_Status *status; 4623 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4624 PetscBT lnkbt; 4625 Mat_Merge_SeqsToMPI *merge; 4626 PetscContainer container; 4627 4628 PetscFunctionBegin; 4629 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4630 4631 /* make sure it is a PETSc comm */ 4632 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4633 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4634 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4635 4636 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4637 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4638 4639 /* determine row ownership */ 4640 /*---------------------------------------------------------*/ 4641 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4642 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4643 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4644 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4645 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4646 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4647 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4648 4649 m = merge->rowmap->n; 4650 M = merge->rowmap->N; 4651 owners = merge->rowmap->range; 4652 4653 /* determine the number of messages to send, their lengths */ 4654 /*---------------------------------------------------------*/ 4655 len_s = merge->len_s; 4656 4657 len = 0; /* length of buf_si[] */ 4658 merge->nsend = 0; 4659 for (proc=0; proc<size; proc++){ 4660 len_si[proc] = 0; 4661 if (proc == rank){ 4662 len_s[proc] = 0; 4663 } else { 4664 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4665 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4666 } 4667 if (len_s[proc]) { 4668 merge->nsend++; 4669 nrows = 0; 4670 for (i=owners[proc]; i<owners[proc+1]; i++){ 4671 if (ai[i+1] > ai[i]) nrows++; 4672 } 4673 len_si[proc] = 2*(nrows+1); 4674 len += len_si[proc]; 4675 } 4676 } 4677 4678 /* determine the number and length of messages to receive for ij-structure */ 4679 /*-------------------------------------------------------------------------*/ 4680 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4681 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4682 4683 /* post the Irecv of j-structure */ 4684 /*-------------------------------*/ 4685 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4686 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4687 4688 /* post the Isend of j-structure */ 4689 /*--------------------------------*/ 4690 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4691 4692 for (proc=0, k=0; proc<size; proc++){ 4693 if (!len_s[proc]) continue; 4694 i = owners[proc]; 4695 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4696 k++; 4697 } 4698 4699 /* receives and sends of j-structure are complete */ 4700 /*------------------------------------------------*/ 4701 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4702 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4703 4704 /* send and recv i-structure */ 4705 /*---------------------------*/ 4706 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4707 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4708 4709 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4710 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4711 for (proc=0,k=0; proc<size; proc++){ 4712 if (!len_s[proc]) continue; 4713 /* form outgoing message for i-structure: 4714 buf_si[0]: nrows to be sent 4715 [1:nrows]: row index (global) 4716 [nrows+1:2*nrows+1]: i-structure index 4717 */ 4718 /*-------------------------------------------*/ 4719 nrows = len_si[proc]/2 - 1; 4720 buf_si_i = buf_si + nrows+1; 4721 buf_si[0] = nrows; 4722 buf_si_i[0] = 0; 4723 nrows = 0; 4724 for (i=owners[proc]; i<owners[proc+1]; i++){ 4725 anzi = ai[i+1] - ai[i]; 4726 if (anzi) { 4727 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4728 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4729 nrows++; 4730 } 4731 } 4732 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4733 k++; 4734 buf_si += len_si[proc]; 4735 } 4736 4737 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4738 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4739 4740 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4741 for (i=0; i<merge->nrecv; i++){ 4742 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4743 } 4744 4745 ierr = PetscFree(len_si);CHKERRQ(ierr); 4746 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4747 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4748 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4749 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4750 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4751 ierr = PetscFree(status);CHKERRQ(ierr); 4752 4753 /* compute a local seq matrix in each processor */ 4754 /*----------------------------------------------*/ 4755 /* allocate bi array and free space for accumulating nonzero column info */ 4756 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4757 bi[0] = 0; 4758 4759 /* create and initialize a linked list */ 4760 nlnk = N+1; 4761 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4762 4763 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4764 len = 0; 4765 len = ai[owners[rank+1]] - ai[owners[rank]]; 4766 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4767 current_space = free_space; 4768 4769 /* determine symbolic info for each local row */ 4770 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4771 4772 for (k=0; k<merge->nrecv; k++){ 4773 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4774 nrows = *buf_ri_k[k]; 4775 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4776 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4777 } 4778 4779 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4780 len = 0; 4781 for (i=0;i<m;i++) { 4782 bnzi = 0; 4783 /* add local non-zero cols of this proc's seqmat into lnk */ 4784 arow = owners[rank] + i; 4785 anzi = ai[arow+1] - ai[arow]; 4786 aj = a->j + ai[arow]; 4787 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4788 bnzi += nlnk; 4789 /* add received col data into lnk */ 4790 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4791 if (i == *nextrow[k]) { /* i-th row */ 4792 anzi = *(nextai[k]+1) - *nextai[k]; 4793 aj = buf_rj[k] + *nextai[k]; 4794 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4795 bnzi += nlnk; 4796 nextrow[k]++; nextai[k]++; 4797 } 4798 } 4799 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4800 4801 /* if free space is not available, make more free space */ 4802 if (current_space->local_remaining<bnzi) { 4803 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4804 nspacedouble++; 4805 } 4806 /* copy data into free space, then initialize lnk */ 4807 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4808 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4809 4810 current_space->array += bnzi; 4811 current_space->local_used += bnzi; 4812 current_space->local_remaining -= bnzi; 4813 4814 bi[i+1] = bi[i] + bnzi; 4815 } 4816 4817 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4818 4819 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4820 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4821 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4822 4823 /* create symbolic parallel matrix B_mpi */ 4824 /*---------------------------------------*/ 4825 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4826 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4827 if (n==PETSC_DECIDE) { 4828 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4829 } else { 4830 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4831 } 4832 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4833 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4834 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4835 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4836 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4837 4838 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4839 B_mpi->assembled = PETSC_FALSE; 4840 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4841 merge->bi = bi; 4842 merge->bj = bj; 4843 merge->buf_ri = buf_ri; 4844 merge->buf_rj = buf_rj; 4845 merge->coi = PETSC_NULL; 4846 merge->coj = PETSC_NULL; 4847 merge->owners_co = PETSC_NULL; 4848 4849 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4850 4851 /* attach the supporting struct to B_mpi for reuse */ 4852 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4853 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4854 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4855 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4856 *mpimat = B_mpi; 4857 4858 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4859 PetscFunctionReturn(0); 4860 } 4861 4862 #undef __FUNCT__ 4863 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4864 /*@C 4865 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4866 matrices from each processor 4867 4868 Collective on MPI_Comm 4869 4870 Input Parameters: 4871 + comm - the communicators the parallel matrix will live on 4872 . seqmat - the input sequential matrices 4873 . m - number of local rows (or PETSC_DECIDE) 4874 . n - number of local columns (or PETSC_DECIDE) 4875 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4876 4877 Output Parameter: 4878 . mpimat - the parallel matrix generated 4879 4880 Level: advanced 4881 4882 Notes: 4883 The dimensions of the sequential matrix in each processor MUST be the same. 4884 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4885 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4886 @*/ 4887 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4888 { 4889 PetscErrorCode ierr; 4890 PetscMPIInt size; 4891 4892 PetscFunctionBegin; 4893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4894 if (size == 1){ 4895 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4896 if (scall == MAT_INITIAL_MATRIX){ 4897 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4898 } else { 4899 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4900 } 4901 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4902 PetscFunctionReturn(0); 4903 } 4904 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4905 if (scall == MAT_INITIAL_MATRIX){ 4906 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4907 } 4908 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4909 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 #undef __FUNCT__ 4914 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4915 /*@ 4916 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4917 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4918 with MatGetSize() 4919 4920 Not Collective 4921 4922 Input Parameters: 4923 + A - the matrix 4924 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4925 4926 Output Parameter: 4927 . A_loc - the local sequential matrix generated 4928 4929 Level: developer 4930 4931 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4932 4933 @*/ 4934 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4935 { 4936 PetscErrorCode ierr; 4937 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4938 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4939 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4940 MatScalar *aa=a->a,*ba=b->a,*cam; 4941 PetscScalar *ca; 4942 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4943 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4944 PetscBool match; 4945 4946 PetscFunctionBegin; 4947 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4948 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4949 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4950 if (scall == MAT_INITIAL_MATRIX){ 4951 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4952 ci[0] = 0; 4953 for (i=0; i<am; i++){ 4954 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4955 } 4956 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4957 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4958 k = 0; 4959 for (i=0; i<am; i++) { 4960 ncols_o = bi[i+1] - bi[i]; 4961 ncols_d = ai[i+1] - ai[i]; 4962 /* off-diagonal portion of A */ 4963 for (jo=0; jo<ncols_o; jo++) { 4964 col = cmap[*bj]; 4965 if (col >= cstart) break; 4966 cj[k] = col; bj++; 4967 ca[k++] = *ba++; 4968 } 4969 /* diagonal portion of A */ 4970 for (j=0; j<ncols_d; j++) { 4971 cj[k] = cstart + *aj++; 4972 ca[k++] = *aa++; 4973 } 4974 /* off-diagonal portion of A */ 4975 for (j=jo; j<ncols_o; j++) { 4976 cj[k] = cmap[*bj++]; 4977 ca[k++] = *ba++; 4978 } 4979 } 4980 /* put together the new matrix */ 4981 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4982 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4983 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4984 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4985 mat->free_a = PETSC_TRUE; 4986 mat->free_ij = PETSC_TRUE; 4987 mat->nonew = 0; 4988 } else if (scall == MAT_REUSE_MATRIX){ 4989 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4990 ci = mat->i; cj = mat->j; cam = mat->a; 4991 for (i=0; i<am; i++) { 4992 /* off-diagonal portion of A */ 4993 ncols_o = bi[i+1] - bi[i]; 4994 for (jo=0; jo<ncols_o; jo++) { 4995 col = cmap[*bj]; 4996 if (col >= cstart) break; 4997 *cam++ = *ba++; bj++; 4998 } 4999 /* diagonal portion of A */ 5000 ncols_d = ai[i+1] - ai[i]; 5001 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5002 /* off-diagonal portion of A */ 5003 for (j=jo; j<ncols_o; j++) { 5004 *cam++ = *ba++; bj++; 5005 } 5006 } 5007 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5008 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5009 PetscFunctionReturn(0); 5010 } 5011 5012 #undef __FUNCT__ 5013 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5014 /*@C 5015 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5016 5017 Not Collective 5018 5019 Input Parameters: 5020 + A - the matrix 5021 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5022 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5023 5024 Output Parameter: 5025 . A_loc - the local sequential matrix generated 5026 5027 Level: developer 5028 5029 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5030 5031 @*/ 5032 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5033 { 5034 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5035 PetscErrorCode ierr; 5036 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5037 IS isrowa,iscola; 5038 Mat *aloc; 5039 PetscBool match; 5040 5041 PetscFunctionBegin; 5042 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5043 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5044 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5045 if (!row){ 5046 start = A->rmap->rstart; end = A->rmap->rend; 5047 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5048 } else { 5049 isrowa = *row; 5050 } 5051 if (!col){ 5052 start = A->cmap->rstart; 5053 cmap = a->garray; 5054 nzA = a->A->cmap->n; 5055 nzB = a->B->cmap->n; 5056 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5057 ncols = 0; 5058 for (i=0; i<nzB; i++) { 5059 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5060 else break; 5061 } 5062 imark = i; 5063 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5064 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5065 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5066 } else { 5067 iscola = *col; 5068 } 5069 if (scall != MAT_INITIAL_MATRIX){ 5070 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5071 aloc[0] = *A_loc; 5072 } 5073 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5074 *A_loc = aloc[0]; 5075 ierr = PetscFree(aloc);CHKERRQ(ierr); 5076 if (!row){ 5077 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5078 } 5079 if (!col){ 5080 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5081 } 5082 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5083 PetscFunctionReturn(0); 5084 } 5085 5086 #undef __FUNCT__ 5087 #define __FUNCT__ "MatGetBrowsOfAcols" 5088 /*@C 5089 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5090 5091 Collective on Mat 5092 5093 Input Parameters: 5094 + A,B - the matrices in mpiaij format 5095 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5096 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5097 5098 Output Parameter: 5099 + rowb, colb - index sets of rows and columns of B to extract 5100 - B_seq - the sequential matrix generated 5101 5102 Level: developer 5103 5104 @*/ 5105 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5106 { 5107 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5108 PetscErrorCode ierr; 5109 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5110 IS isrowb,iscolb; 5111 Mat *bseq=PETSC_NULL; 5112 5113 PetscFunctionBegin; 5114 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5115 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5116 } 5117 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5118 5119 if (scall == MAT_INITIAL_MATRIX){ 5120 start = A->cmap->rstart; 5121 cmap = a->garray; 5122 nzA = a->A->cmap->n; 5123 nzB = a->B->cmap->n; 5124 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5125 ncols = 0; 5126 for (i=0; i<nzB; i++) { /* row < local row index */ 5127 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5128 else break; 5129 } 5130 imark = i; 5131 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5132 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5133 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5134 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5135 } else { 5136 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5137 isrowb = *rowb; iscolb = *colb; 5138 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5139 bseq[0] = *B_seq; 5140 } 5141 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5142 *B_seq = bseq[0]; 5143 ierr = PetscFree(bseq);CHKERRQ(ierr); 5144 if (!rowb){ 5145 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5146 } else { 5147 *rowb = isrowb; 5148 } 5149 if (!colb){ 5150 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5151 } else { 5152 *colb = iscolb; 5153 } 5154 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5155 PetscFunctionReturn(0); 5156 } 5157 5158 #undef __FUNCT__ 5159 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5160 /* 5161 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5162 of the OFF-DIAGONAL portion of local A 5163 5164 Collective on Mat 5165 5166 Input Parameters: 5167 + A,B - the matrices in mpiaij format 5168 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5169 5170 Output Parameter: 5171 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5172 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5173 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5174 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5175 5176 Level: developer 5177 5178 */ 5179 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5180 { 5181 VecScatter_MPI_General *gen_to,*gen_from; 5182 PetscErrorCode ierr; 5183 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5184 Mat_SeqAIJ *b_oth; 5185 VecScatter ctx=a->Mvctx; 5186 MPI_Comm comm=((PetscObject)ctx)->comm; 5187 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5188 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5189 PetscScalar *rvalues,*svalues; 5190 MatScalar *b_otha,*bufa,*bufA; 5191 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5192 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5193 MPI_Status *sstatus,rstatus; 5194 PetscMPIInt jj; 5195 PetscInt *cols,sbs,rbs; 5196 PetscScalar *vals; 5197 5198 PetscFunctionBegin; 5199 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5200 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5201 } 5202 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5203 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5204 5205 gen_to = (VecScatter_MPI_General*)ctx->todata; 5206 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5207 rvalues = gen_from->values; /* holds the length of receiving row */ 5208 svalues = gen_to->values; /* holds the length of sending row */ 5209 nrecvs = gen_from->n; 5210 nsends = gen_to->n; 5211 5212 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5213 srow = gen_to->indices; /* local row index to be sent */ 5214 sstarts = gen_to->starts; 5215 sprocs = gen_to->procs; 5216 sstatus = gen_to->sstatus; 5217 sbs = gen_to->bs; 5218 rstarts = gen_from->starts; 5219 rprocs = gen_from->procs; 5220 rbs = gen_from->bs; 5221 5222 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5223 if (scall == MAT_INITIAL_MATRIX){ 5224 /* i-array */ 5225 /*---------*/ 5226 /* post receives */ 5227 for (i=0; i<nrecvs; i++){ 5228 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5229 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5230 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5231 } 5232 5233 /* pack the outgoing message */ 5234 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5235 sstartsj[0] = 0; rstartsj[0] = 0; 5236 len = 0; /* total length of j or a array to be sent */ 5237 k = 0; 5238 for (i=0; i<nsends; i++){ 5239 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5240 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5241 for (j=0; j<nrows; j++) { 5242 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5243 for (l=0; l<sbs; l++){ 5244 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5245 rowlen[j*sbs+l] = ncols; 5246 len += ncols; 5247 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5248 } 5249 k++; 5250 } 5251 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5252 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5253 } 5254 /* recvs and sends of i-array are completed */ 5255 i = nrecvs; 5256 while (i--) { 5257 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5258 } 5259 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5260 5261 /* allocate buffers for sending j and a arrays */ 5262 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5263 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5264 5265 /* create i-array of B_oth */ 5266 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5267 b_othi[0] = 0; 5268 len = 0; /* total length of j or a array to be received */ 5269 k = 0; 5270 for (i=0; i<nrecvs; i++){ 5271 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5272 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5273 for (j=0; j<nrows; j++) { 5274 b_othi[k+1] = b_othi[k] + rowlen[j]; 5275 len += rowlen[j]; k++; 5276 } 5277 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5278 } 5279 5280 /* allocate space for j and a arrrays of B_oth */ 5281 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5282 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5283 5284 /* j-array */ 5285 /*---------*/ 5286 /* post receives of j-array */ 5287 for (i=0; i<nrecvs; i++){ 5288 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5289 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5290 } 5291 5292 /* pack the outgoing message j-array */ 5293 k = 0; 5294 for (i=0; i<nsends; i++){ 5295 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5296 bufJ = bufj+sstartsj[i]; 5297 for (j=0; j<nrows; j++) { 5298 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5299 for (ll=0; ll<sbs; ll++){ 5300 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5301 for (l=0; l<ncols; l++){ 5302 *bufJ++ = cols[l]; 5303 } 5304 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5305 } 5306 } 5307 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5308 } 5309 5310 /* recvs and sends of j-array are completed */ 5311 i = nrecvs; 5312 while (i--) { 5313 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5314 } 5315 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5316 } else if (scall == MAT_REUSE_MATRIX){ 5317 sstartsj = *startsj_s; 5318 rstartsj = *startsj_r; 5319 bufa = *bufa_ptr; 5320 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5321 b_otha = b_oth->a; 5322 } else { 5323 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5324 } 5325 5326 /* a-array */ 5327 /*---------*/ 5328 /* post receives of a-array */ 5329 for (i=0; i<nrecvs; i++){ 5330 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5331 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* pack the outgoing message a-array */ 5335 k = 0; 5336 for (i=0; i<nsends; i++){ 5337 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5338 bufA = bufa+sstartsj[i]; 5339 for (j=0; j<nrows; j++) { 5340 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5341 for (ll=0; ll<sbs; ll++){ 5342 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5343 for (l=0; l<ncols; l++){ 5344 *bufA++ = vals[l]; 5345 } 5346 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5347 } 5348 } 5349 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5350 } 5351 /* recvs and sends of a-array are completed */ 5352 i = nrecvs; 5353 while (i--) { 5354 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5355 } 5356 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5357 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5358 5359 if (scall == MAT_INITIAL_MATRIX){ 5360 /* put together the new matrix */ 5361 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5362 5363 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5364 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5365 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5366 b_oth->free_a = PETSC_TRUE; 5367 b_oth->free_ij = PETSC_TRUE; 5368 b_oth->nonew = 0; 5369 5370 ierr = PetscFree(bufj);CHKERRQ(ierr); 5371 if (!startsj_s || !bufa_ptr){ 5372 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5373 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5374 } else { 5375 *startsj_s = sstartsj; 5376 *startsj_r = rstartsj; 5377 *bufa_ptr = bufa; 5378 } 5379 } 5380 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5381 PetscFunctionReturn(0); 5382 } 5383 5384 #undef __FUNCT__ 5385 #define __FUNCT__ "MatGetCommunicationStructs" 5386 /*@C 5387 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5388 5389 Not Collective 5390 5391 Input Parameters: 5392 . A - The matrix in mpiaij format 5393 5394 Output Parameter: 5395 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5396 . colmap - A map from global column index to local index into lvec 5397 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5398 5399 Level: developer 5400 5401 @*/ 5402 #if defined (PETSC_USE_CTABLE) 5403 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5404 #else 5405 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5406 #endif 5407 { 5408 Mat_MPIAIJ *a; 5409 5410 PetscFunctionBegin; 5411 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5412 PetscValidPointer(lvec, 2); 5413 PetscValidPointer(colmap, 3); 5414 PetscValidPointer(multScatter, 4); 5415 a = (Mat_MPIAIJ *) A->data; 5416 if (lvec) *lvec = a->lvec; 5417 if (colmap) *colmap = a->colmap; 5418 if (multScatter) *multScatter = a->Mvctx; 5419 PetscFunctionReturn(0); 5420 } 5421 5422 EXTERN_C_BEGIN 5423 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5424 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5425 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5426 EXTERN_C_END 5427 5428 #undef __FUNCT__ 5429 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5430 /* 5431 Computes (B'*A')' since computing B*A directly is untenable 5432 5433 n p p 5434 ( ) ( ) ( ) 5435 m ( A ) * n ( B ) = m ( C ) 5436 ( ) ( ) ( ) 5437 5438 */ 5439 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5440 { 5441 PetscErrorCode ierr; 5442 Mat At,Bt,Ct; 5443 5444 PetscFunctionBegin; 5445 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5446 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5447 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5448 ierr = MatDestroy(&At);CHKERRQ(ierr); 5449 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5450 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5451 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5452 PetscFunctionReturn(0); 5453 } 5454 5455 #undef __FUNCT__ 5456 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5457 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5458 { 5459 PetscErrorCode ierr; 5460 PetscInt m=A->rmap->n,n=B->cmap->n; 5461 Mat Cmat; 5462 5463 PetscFunctionBegin; 5464 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5465 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5466 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5467 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5468 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5469 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5470 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5471 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5472 *C = Cmat; 5473 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5474 PetscFunctionReturn(0); 5475 } 5476 5477 /* ----------------------------------------------------------------*/ 5478 #undef __FUNCT__ 5479 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5480 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5481 { 5482 PetscErrorCode ierr; 5483 5484 PetscFunctionBegin; 5485 if (scall == MAT_INITIAL_MATRIX){ 5486 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5487 } 5488 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5489 PetscFunctionReturn(0); 5490 } 5491 5492 EXTERN_C_BEGIN 5493 #if defined(PETSC_HAVE_MUMPS) 5494 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5495 #endif 5496 #if defined(PETSC_HAVE_PASTIX) 5497 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5498 #endif 5499 #if defined(PETSC_HAVE_SUPERLU_DIST) 5500 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5501 #endif 5502 #if defined(PETSC_HAVE_SPOOLES) 5503 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5504 #endif 5505 EXTERN_C_END 5506 5507 /*MC 5508 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5509 5510 Options Database Keys: 5511 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5512 5513 Level: beginner 5514 5515 .seealso: MatCreateAIJ() 5516 M*/ 5517 5518 EXTERN_C_BEGIN 5519 #undef __FUNCT__ 5520 #define __FUNCT__ "MatCreate_MPIAIJ" 5521 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5522 { 5523 Mat_MPIAIJ *b; 5524 PetscErrorCode ierr; 5525 PetscMPIInt size; 5526 5527 PetscFunctionBegin; 5528 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5529 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5530 B->data = (void*)b; 5531 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5532 B->assembled = PETSC_FALSE; 5533 B->insertmode = NOT_SET_VALUES; 5534 b->size = size; 5535 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5536 5537 /* build cache for off array entries formed */ 5538 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5539 b->donotstash = PETSC_FALSE; 5540 b->colmap = 0; 5541 b->garray = 0; 5542 b->roworiented = PETSC_TRUE; 5543 5544 /* stuff used for matrix vector multiply */ 5545 b->lvec = PETSC_NULL; 5546 b->Mvctx = PETSC_NULL; 5547 5548 /* stuff for MatGetRow() */ 5549 b->rowindices = 0; 5550 b->rowvalues = 0; 5551 b->getrowactive = PETSC_FALSE; 5552 5553 /* flexible pointer used in CUSP/CUSPARSE classes */ 5554 b->spptr = PETSC_NULL; 5555 #if defined(PETSC_HAVE_SPOOLES) 5556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5557 "MatGetFactor_mpiaij_spooles", 5558 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5559 #endif 5560 #if defined(PETSC_HAVE_MUMPS) 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5562 "MatGetFactor_aij_mumps", 5563 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5564 #endif 5565 #if defined(PETSC_HAVE_PASTIX) 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5567 "MatGetFactor_mpiaij_pastix", 5568 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5569 #endif 5570 #if defined(PETSC_HAVE_SUPERLU_DIST) 5571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5572 "MatGetFactor_mpiaij_superlu_dist", 5573 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5574 #endif 5575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5576 "MatStoreValues_MPIAIJ", 5577 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5579 "MatRetrieveValues_MPIAIJ", 5580 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5582 "MatGetDiagonalBlock_MPIAIJ", 5583 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5585 "MatIsTranspose_MPIAIJ", 5586 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5588 "MatMPIAIJSetPreallocation_MPIAIJ", 5589 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5591 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5592 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5594 "MatDiagonalScaleLocal_MPIAIJ", 5595 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5597 "MatConvert_MPIAIJ_MPIAIJPERM", 5598 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5600 "MatConvert_MPIAIJ_MPIAIJCRL", 5601 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5603 "MatConvert_MPIAIJ_MPISBAIJ", 5604 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5606 "MatMatMult_MPIDense_MPIAIJ", 5607 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5609 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5610 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5612 "MatMatMultNumeric_MPIDense_MPIAIJ", 5613 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 EXTERN_C_END 5618 5619 #undef __FUNCT__ 5620 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5621 /*@ 5622 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5623 and "off-diagonal" part of the matrix in CSR format. 5624 5625 Collective on MPI_Comm 5626 5627 Input Parameters: 5628 + comm - MPI communicator 5629 . m - number of local rows (Cannot be PETSC_DECIDE) 5630 . n - This value should be the same as the local size used in creating the 5631 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5632 calculated if N is given) For square matrices n is almost always m. 5633 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5634 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5635 . i - row indices for "diagonal" portion of matrix 5636 . j - column indices 5637 . a - matrix values 5638 . oi - row indices for "off-diagonal" portion of matrix 5639 . oj - column indices 5640 - oa - matrix values 5641 5642 Output Parameter: 5643 . mat - the matrix 5644 5645 Level: advanced 5646 5647 Notes: 5648 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5649 must free the arrays once the matrix has been destroyed and not before. 5650 5651 The i and j indices are 0 based 5652 5653 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5654 5655 This sets local rows and cannot be used to set off-processor values. 5656 5657 You cannot later use MatSetValues() to change values in this matrix. 5658 5659 .keywords: matrix, aij, compressed row, sparse, parallel 5660 5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5662 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5663 @*/ 5664 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5665 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5666 { 5667 PetscErrorCode ierr; 5668 Mat_MPIAIJ *maij; 5669 5670 PetscFunctionBegin; 5671 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5672 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5673 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5674 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5675 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5676 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5677 maij = (Mat_MPIAIJ*) (*mat)->data; 5678 maij->donotstash = PETSC_TRUE; 5679 (*mat)->preallocated = PETSC_TRUE; 5680 5681 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5682 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5683 5684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5685 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5686 5687 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 5692 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 PetscFunctionReturn(0); 5695 } 5696 5697 /* 5698 Special version for direct calls from Fortran 5699 */ 5700 #include <petsc-private/fortranimpl.h> 5701 5702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5706 #endif 5707 5708 /* Change these macros so can be used in void function */ 5709 #undef CHKERRQ 5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5711 #undef SETERRQ2 5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ3 5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ 5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5717 5718 EXTERN_C_BEGIN 5719 #undef __FUNCT__ 5720 #define __FUNCT__ "matsetvaluesmpiaij_" 5721 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5722 { 5723 Mat mat = *mmat; 5724 PetscInt m = *mm, n = *mn; 5725 InsertMode addv = *maddv; 5726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5727 PetscScalar value; 5728 PetscErrorCode ierr; 5729 5730 MatCheckPreallocated(mat,1); 5731 if (mat->insertmode == NOT_SET_VALUES) { 5732 mat->insertmode = addv; 5733 } 5734 #if defined(PETSC_USE_DEBUG) 5735 else if (mat->insertmode != addv) { 5736 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5737 } 5738 #endif 5739 { 5740 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5741 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5742 PetscBool roworiented = aij->roworiented; 5743 5744 /* Some Variables required in the macro */ 5745 Mat A = aij->A; 5746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5747 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5748 MatScalar *aa = a->a; 5749 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5750 Mat B = aij->B; 5751 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5752 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5753 MatScalar *ba = b->a; 5754 5755 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5756 PetscInt nonew = a->nonew; 5757 MatScalar *ap1,*ap2; 5758 5759 PetscFunctionBegin; 5760 for (i=0; i<m; i++) { 5761 if (im[i] < 0) continue; 5762 #if defined(PETSC_USE_DEBUG) 5763 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5764 #endif 5765 if (im[i] >= rstart && im[i] < rend) { 5766 row = im[i] - rstart; 5767 lastcol1 = -1; 5768 rp1 = aj + ai[row]; 5769 ap1 = aa + ai[row]; 5770 rmax1 = aimax[row]; 5771 nrow1 = ailen[row]; 5772 low1 = 0; 5773 high1 = nrow1; 5774 lastcol2 = -1; 5775 rp2 = bj + bi[row]; 5776 ap2 = ba + bi[row]; 5777 rmax2 = bimax[row]; 5778 nrow2 = bilen[row]; 5779 low2 = 0; 5780 high2 = nrow2; 5781 5782 for (j=0; j<n; j++) { 5783 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5784 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5785 if (in[j] >= cstart && in[j] < cend){ 5786 col = in[j] - cstart; 5787 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5788 } else if (in[j] < 0) continue; 5789 #if defined(PETSC_USE_DEBUG) 5790 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5791 #endif 5792 else { 5793 if (mat->was_assembled) { 5794 if (!aij->colmap) { 5795 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5796 } 5797 #if defined (PETSC_USE_CTABLE) 5798 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5799 col--; 5800 #else 5801 col = aij->colmap[in[j]] - 1; 5802 #endif 5803 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5804 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5805 col = in[j]; 5806 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5807 B = aij->B; 5808 b = (Mat_SeqAIJ*)B->data; 5809 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5810 rp2 = bj + bi[row]; 5811 ap2 = ba + bi[row]; 5812 rmax2 = bimax[row]; 5813 nrow2 = bilen[row]; 5814 low2 = 0; 5815 high2 = nrow2; 5816 bm = aij->B->rmap->n; 5817 ba = b->a; 5818 } 5819 } else col = in[j]; 5820 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5821 } 5822 } 5823 } else { 5824 if (!aij->donotstash) { 5825 if (roworiented) { 5826 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5827 } else { 5828 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5829 } 5830 } 5831 } 5832 }} 5833 PetscFunctionReturnVoid(); 5834 } 5835 EXTERN_C_END 5836 5837