1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 #if defined(PETSC_HAVE_DSCPACK) 163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 164 #endif 165 166 #define CHUNKSIZE 10 167 168 /* 169 Checks for missing diagonals 170 */ 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 173 int MatMissingDiagonal_SeqBAIJ(Mat A) 174 { 175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 176 int *diag,*jj = a->j,i,ierr; 177 178 PetscFunctionBegin; 179 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 180 diag = a->diag; 181 for (i=0; i<a->mbs; i++) { 182 if (jj[diag[i]] != i) { 183 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 184 } 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 191 int MatMarkDiagonal_SeqBAIJ(Mat A) 192 { 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 194 int i,j,*diag,m = a->mbs,ierr; 195 196 PetscFunctionBegin; 197 if (a->diag) PetscFunctionReturn(0); 198 199 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 200 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 201 for (i=0; i<m; i++) { 202 diag[i] = a->i[i+1]; 203 for (j=a->i[i]; j<a->i[i+1]; j++) { 204 if (a->j[j] == i) { 205 diag[i] = j; 206 break; 207 } 208 } 209 } 210 a->diag = diag; 211 PetscFunctionReturn(0); 212 } 213 214 215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 220 { 221 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 222 int ierr,n = a->mbs,i; 223 224 PetscFunctionBegin; 225 *nn = n; 226 if (!ia) PetscFunctionReturn(0); 227 if (symmetric) { 228 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 229 } else if (oshift == 1) { 230 /* temporarily add 1 to i and j indices */ 231 int nz = a->i[n]; 232 for (i=0; i<nz; i++) a->j[i]++; 233 for (i=0; i<n+1; i++) a->i[i]++; 234 *ia = a->i; *ja = a->j; 235 } else { 236 *ia = a->i; *ja = a->j; 237 } 238 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 245 { 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 247 int i,n = a->mbs,ierr; 248 249 PetscFunctionBegin; 250 if (!ia) PetscFunctionReturn(0); 251 if (symmetric) { 252 ierr = PetscFree(*ia);CHKERRQ(ierr); 253 ierr = PetscFree(*ja);CHKERRQ(ierr); 254 } else if (oshift == 1) { 255 int nz = a->i[n]-1; 256 for (i=0; i<nz; i++) a->j[i]--; 257 for (i=0; i<n+1; i++) a->i[i]--; 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 265 { 266 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 267 268 PetscFunctionBegin; 269 *bs = baij->bs; 270 PetscFunctionReturn(0); 271 } 272 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_SeqBAIJ" 276 int MatDestroy_SeqBAIJ(Mat A) 277 { 278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 279 int ierr; 280 281 PetscFunctionBegin; 282 #if defined(PETSC_USE_LOG) 283 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 284 #endif 285 ierr = PetscFree(a->a);CHKERRQ(ierr); 286 if (!a->singlemalloc) { 287 ierr = PetscFree(a->i);CHKERRQ(ierr); 288 ierr = PetscFree(a->j);CHKERRQ(ierr); 289 } 290 if (a->row) { 291 ierr = ISDestroy(a->row);CHKERRQ(ierr); 292 } 293 if (a->col) { 294 ierr = ISDestroy(a->col);CHKERRQ(ierr); 295 } 296 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 297 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 298 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 299 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 300 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 301 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 302 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 303 #if defined(PETSC_USE_MAT_SINGLE) 304 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 305 #endif 306 ierr = PetscFree(a);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatSetOption_SeqBAIJ" 312 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 313 { 314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 switch (op) { 318 case MAT_ROW_ORIENTED: 319 a->roworiented = PETSC_TRUE; 320 break; 321 case MAT_COLUMN_ORIENTED: 322 a->roworiented = PETSC_FALSE; 323 break; 324 case MAT_COLUMNS_SORTED: 325 a->sorted = PETSC_TRUE; 326 break; 327 case MAT_COLUMNS_UNSORTED: 328 a->sorted = PETSC_FALSE; 329 break; 330 case MAT_KEEP_ZEROED_ROWS: 331 a->keepzeroedrows = PETSC_TRUE; 332 break; 333 case MAT_NO_NEW_NONZERO_LOCATIONS: 334 a->nonew = 1; 335 break; 336 case MAT_NEW_NONZERO_LOCATION_ERR: 337 a->nonew = -1; 338 break; 339 case MAT_NEW_NONZERO_ALLOCATION_ERR: 340 a->nonew = -2; 341 break; 342 case MAT_YES_NEW_NONZERO_LOCATIONS: 343 a->nonew = 0; 344 break; 345 case MAT_ROWS_SORTED: 346 case MAT_ROWS_UNSORTED: 347 case MAT_YES_NEW_DIAGONALS: 348 case MAT_IGNORE_OFF_PROC_ENTRIES: 349 case MAT_USE_HASH_TABLE: 350 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 351 break; 352 case MAT_NO_NEW_DIAGONALS: 353 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 354 break; 355 default: 356 SETERRQ(PETSC_ERR_SUP,"unknown option"); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetRow_SeqBAIJ" 363 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 366 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 367 MatScalar *aa,*aa_i; 368 PetscScalar *v_i; 369 370 PetscFunctionBegin; 371 bs = a->bs; 372 ai = a->i; 373 aj = a->j; 374 aa = a->a; 375 bs2 = a->bs2; 376 377 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 378 379 bn = row/bs; /* Block number */ 380 bp = row % bs; /* Block Position */ 381 M = ai[bn+1] - ai[bn]; 382 *nz = bs*M; 383 384 if (v) { 385 *v = 0; 386 if (*nz) { 387 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 388 for (i=0; i<M; i++) { /* for each block in the block row */ 389 v_i = *v + i*bs; 390 aa_i = aa + bs2*(ai[bn] + i); 391 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 392 } 393 } 394 } 395 396 if (idx) { 397 *idx = 0; 398 if (*nz) { 399 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 400 for (i=0; i<M; i++) { /* for each block in the block row */ 401 idx_i = *idx + i*bs; 402 itmp = bs*aj[ai[bn] + i]; 403 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 404 } 405 } 406 } 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 412 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 413 { 414 int ierr; 415 416 PetscFunctionBegin; 417 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 418 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatTranspose_SeqBAIJ" 424 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 425 { 426 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 427 Mat C; 428 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 429 int *rows,*cols,bs2=a->bs2; 430 PetscScalar *array; 431 432 PetscFunctionBegin; 433 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 434 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 435 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 436 437 #if defined(PETSC_USE_MAT_SINGLE) 438 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 439 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 440 #else 441 array = a->a; 442 #endif 443 444 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 445 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 446 ierr = PetscFree(col);CHKERRQ(ierr); 447 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 448 cols = rows + bs; 449 for (i=0; i<mbs; i++) { 450 cols[0] = i*bs; 451 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 452 len = ai[i+1] - ai[i]; 453 for (j=0; j<len; j++) { 454 rows[0] = (*aj++)*bs; 455 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 456 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 457 array += bs2; 458 } 459 } 460 ierr = PetscFree(rows);CHKERRQ(ierr); 461 #if defined(PETSC_USE_MAT_SINGLE) 462 ierr = PetscFree(array); 463 #endif 464 465 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 468 if (B) { 469 *B = C; 470 } else { 471 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 472 } 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 478 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 481 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 482 PetscScalar *aa; 483 FILE *file; 484 485 PetscFunctionBegin; 486 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 487 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 488 col_lens[0] = MAT_FILE_COOKIE; 489 490 col_lens[1] = A->m; 491 col_lens[2] = A->n; 492 col_lens[3] = a->nz*bs2; 493 494 /* store lengths of each row and write (including header) to file */ 495 count = 0; 496 for (i=0; i<a->mbs; i++) { 497 for (j=0; j<bs; j++) { 498 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 499 } 500 } 501 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 502 ierr = PetscFree(col_lens);CHKERRQ(ierr); 503 504 /* store column indices (zero start index) */ 505 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<a->mbs; i++) { 508 for (j=0; j<bs; j++) { 509 for (k=a->i[i]; k<a->i[i+1]; k++) { 510 for (l=0; l<bs; l++) { 511 jj[count++] = bs*a->j[k] + l; 512 } 513 } 514 } 515 } 516 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 517 ierr = PetscFree(jj);CHKERRQ(ierr); 518 519 /* store nonzero values */ 520 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<a->mbs; i++) { 523 for (j=0; j<bs; j++) { 524 for (k=a->i[i]; k<a->i[i+1]; k++) { 525 for (l=0; l<bs; l++) { 526 aa[count++] = a->a[bs2*k + l*bs + j]; 527 } 528 } 529 } 530 } 531 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 532 ierr = PetscFree(aa);CHKERRQ(ierr); 533 534 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 535 if (file) { 536 fprintf(file,"-matload_block_size %d\n",a->bs); 537 } 538 PetscFunctionReturn(0); 539 } 540 541 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 545 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 549 PetscViewerFormat format; 550 551 PetscFunctionBegin; 552 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 553 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 554 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 555 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 556 Mat aij; 557 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 558 ierr = MatView(aij,viewer);CHKERRQ(ierr); 559 ierr = MatDestroy(aij);CHKERRQ(ierr); 560 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 561 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 562 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 563 #endif 564 PetscFunctionReturn(0); 565 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 566 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 567 for (i=0; i<a->mbs; i++) { 568 for (j=0; j<bs; j++) { 569 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 570 for (k=a->i[i]; k<a->i[i+1]; k++) { 571 for (l=0; l<bs; l++) { 572 #if defined(PETSC_USE_COMPLEX) 573 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 574 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 575 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 576 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 577 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 578 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 579 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 580 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 581 } 582 #else 583 if (a->a[bs2*k + l*bs + j] != 0.0) { 584 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 585 } 586 #endif 587 } 588 } 589 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 590 } 591 } 592 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 593 } else { 594 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 595 for (i=0; i<a->mbs; i++) { 596 for (j=0; j<bs; j++) { 597 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 598 for (k=a->i[i]; k<a->i[i+1]; k++) { 599 for (l=0; l<bs; l++) { 600 #if defined(PETSC_USE_COMPLEX) 601 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 602 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 603 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 604 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 605 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 606 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 607 } else { 608 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 609 } 610 #else 611 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 612 #endif 613 } 614 } 615 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 616 } 617 } 618 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 626 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 627 { 628 Mat A = (Mat) Aa; 629 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 630 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 631 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 632 MatScalar *aa; 633 PetscViewer viewer; 634 635 PetscFunctionBegin; 636 637 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 638 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 639 640 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 641 642 /* loop over matrix elements drawing boxes */ 643 color = PETSC_DRAW_BLUE; 644 for (i=0,row=0; i<mbs; i++,row+=bs) { 645 for (j=a->i[i]; j<a->i[i+1]; j++) { 646 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 647 x_l = a->j[j]*bs; x_r = x_l + 1.0; 648 aa = a->a + j*bs2; 649 for (k=0; k<bs; k++) { 650 for (l=0; l<bs; l++) { 651 if (PetscRealPart(*aa++) >= 0.) continue; 652 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 653 } 654 } 655 } 656 } 657 color = PETSC_DRAW_CYAN; 658 for (i=0,row=0; i<mbs; i++,row+=bs) { 659 for (j=a->i[i]; j<a->i[i+1]; j++) { 660 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 661 x_l = a->j[j]*bs; x_r = x_l + 1.0; 662 aa = a->a + j*bs2; 663 for (k=0; k<bs; k++) { 664 for (l=0; l<bs; l++) { 665 if (PetscRealPart(*aa++) != 0.) continue; 666 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 667 } 668 } 669 } 670 } 671 672 color = PETSC_DRAW_RED; 673 for (i=0,row=0; i<mbs; i++,row+=bs) { 674 for (j=a->i[i]; j<a->i[i+1]; j++) { 675 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 676 x_l = a->j[j]*bs; x_r = x_l + 1.0; 677 aa = a->a + j*bs2; 678 for (k=0; k<bs; k++) { 679 for (l=0; l<bs; l++) { 680 if (PetscRealPart(*aa++) <= 0.) continue; 681 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 691 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 692 { 693 int ierr; 694 PetscReal xl,yl,xr,yr,w,h; 695 PetscDraw draw; 696 PetscTruth isnull; 697 698 PetscFunctionBegin; 699 700 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 701 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 702 703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 704 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 705 xr += w; yr += h; xl = -w; yl = -h; 706 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 707 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 708 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatView_SeqBAIJ" 714 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 715 { 716 int ierr; 717 PetscTruth issocket,isascii,isbinary,isdraw; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 724 if (issocket) { 725 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 726 } else if (isascii){ 727 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 728 } else if (isbinary) { 729 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 730 } else if (isdraw) { 731 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 732 } else { 733 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 734 } 735 PetscFunctionReturn(0); 736 } 737 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatGetValues_SeqBAIJ" 741 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 742 { 743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 744 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 745 int *ai = a->i,*ailen = a->ilen; 746 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 747 MatScalar *ap,*aa = a->a,zero = 0.0; 748 749 PetscFunctionBegin; 750 for (k=0; k<m; k++) { /* loop over rows */ 751 row = im[k]; brow = row/bs; 752 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 753 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 754 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 755 nrow = ailen[brow]; 756 for (l=0; l<n; l++) { /* loop over columns */ 757 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 758 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 759 col = in[l] ; 760 bcol = col/bs; 761 cidx = col%bs; 762 ridx = row%bs; 763 high = nrow; 764 low = 0; /* assume unsorted */ 765 while (high-low > 5) { 766 t = (low+high)/2; 767 if (rp[t] > bcol) high = t; 768 else low = t; 769 } 770 for (i=low; i<high; i++) { 771 if (rp[i] > bcol) break; 772 if (rp[i] == bcol) { 773 *v++ = ap[bs2*i+bs*cidx+ridx]; 774 goto finished; 775 } 776 } 777 *v++ = zero; 778 finished:; 779 } 780 } 781 PetscFunctionReturn(0); 782 } 783 784 #if defined(PETSC_USE_MAT_SINGLE) 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 787 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 788 { 789 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 790 int ierr,i,N = m*n*b->bs2; 791 MatScalar *vsingle; 792 793 PetscFunctionBegin; 794 if (N > b->setvalueslen) { 795 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 796 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 797 b->setvalueslen = N; 798 } 799 vsingle = b->setvaluescopy; 800 for (i=0; i<N; i++) { 801 vsingle[i] = v[i]; 802 } 803 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 #endif 807 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 811 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 812 { 813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 814 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 815 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 816 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 817 PetscTruth roworiented=a->roworiented; 818 MatScalar *value = v,*ap,*aa = a->a,*bap; 819 820 PetscFunctionBegin; 821 if (roworiented) { 822 stepval = (n-1)*bs; 823 } else { 824 stepval = (m-1)*bs; 825 } 826 for (k=0; k<m; k++) { /* loop over added rows */ 827 row = im[k]; 828 if (row < 0) continue; 829 #if defined(PETSC_USE_BOPT_g) 830 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 831 #endif 832 rp = aj + ai[row]; 833 ap = aa + bs2*ai[row]; 834 rmax = imax[row]; 835 nrow = ailen[row]; 836 low = 0; 837 for (l=0; l<n; l++) { /* loop over added columns */ 838 if (in[l] < 0) continue; 839 #if defined(PETSC_USE_BOPT_g) 840 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 841 #endif 842 col = in[l]; 843 if (roworiented) { 844 value = v + k*(stepval+bs)*bs + l*bs; 845 } else { 846 value = v + l*(stepval+bs)*bs + k*bs; 847 } 848 if (!sorted) low = 0; high = nrow; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > col) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 if (rp[i] > col) break; 856 if (rp[i] == col) { 857 bap = ap + bs2*i; 858 if (roworiented) { 859 if (is == ADD_VALUES) { 860 for (ii=0; ii<bs; ii++,value+=stepval) { 861 for (jj=ii; jj<bs2; jj+=bs) { 862 bap[jj] += *value++; 863 } 864 } 865 } else { 866 for (ii=0; ii<bs; ii++,value+=stepval) { 867 for (jj=ii; jj<bs2; jj+=bs) { 868 bap[jj] = *value++; 869 } 870 } 871 } 872 } else { 873 if (is == ADD_VALUES) { 874 for (ii=0; ii<bs; ii++,value+=stepval) { 875 for (jj=0; jj<bs; jj++) { 876 *bap++ += *value++; 877 } 878 } 879 } else { 880 for (ii=0; ii<bs; ii++,value+=stepval) { 881 for (jj=0; jj<bs; jj++) { 882 *bap++ = *value++; 883 } 884 } 885 } 886 } 887 goto noinsert2; 888 } 889 } 890 if (nonew == 1) goto noinsert2; 891 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 892 if (nrow >= rmax) { 893 /* there is no extra room in row, therefore enlarge */ 894 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 895 MatScalar *new_a; 896 897 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 898 899 /* malloc new storage space */ 900 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 901 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 902 new_j = (int*)(new_a + bs2*new_nz); 903 new_i = new_j + new_nz; 904 905 /* copy over old data into new slots */ 906 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 907 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 908 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 909 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 910 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 911 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 912 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 913 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 914 /* free up old matrix storage */ 915 ierr = PetscFree(a->a);CHKERRQ(ierr); 916 if (!a->singlemalloc) { 917 ierr = PetscFree(a->i);CHKERRQ(ierr); 918 ierr = PetscFree(a->j);CHKERRQ(ierr); 919 } 920 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 921 a->singlemalloc = PETSC_TRUE; 922 923 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 924 rmax = imax[row] = imax[row] + CHUNKSIZE; 925 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 926 a->maxnz += bs2*CHUNKSIZE; 927 a->reallocs++; 928 a->nz++; 929 } 930 N = nrow++ - 1; 931 /* shift up all the later entries in this row */ 932 for (ii=N; ii>=i; ii--) { 933 rp[ii+1] = rp[ii]; 934 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 935 } 936 if (N >= i) { 937 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 938 } 939 rp[i] = col; 940 bap = ap + bs2*i; 941 if (roworiented) { 942 for (ii=0; ii<bs; ii++,value+=stepval) { 943 for (jj=ii; jj<bs2; jj+=bs) { 944 bap[jj] = *value++; 945 } 946 } 947 } else { 948 for (ii=0; ii<bs; ii++,value+=stepval) { 949 for (jj=0; jj<bs; jj++) { 950 *bap++ = *value++; 951 } 952 } 953 } 954 noinsert2:; 955 low = i; 956 } 957 ailen[row] = nrow; 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 964 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 965 { 966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 967 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 968 int m = A->m,*ip,N,*ailen = a->ilen; 969 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 970 MatScalar *aa = a->a,*ap; 971 #if defined(PETSC_HAVE_DSCPACK) 972 PetscTruth flag; 973 #endif 974 975 PetscFunctionBegin; 976 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 977 978 if (m) rmax = ailen[0]; 979 for (i=1; i<mbs; i++) { 980 /* move each row back by the amount of empty slots (fshift) before it*/ 981 fshift += imax[i-1] - ailen[i-1]; 982 rmax = PetscMax(rmax,ailen[i]); 983 if (fshift) { 984 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 985 N = ailen[i]; 986 for (j=0; j<N; j++) { 987 ip[j-fshift] = ip[j]; 988 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 989 } 990 } 991 ai[i] = ai[i-1] + ailen[i-1]; 992 } 993 if (mbs) { 994 fshift += imax[mbs-1] - ailen[mbs-1]; 995 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 996 } 997 /* reset ilen and imax for each row */ 998 for (i=0; i<mbs; i++) { 999 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1000 } 1001 a->nz = ai[mbs]; 1002 1003 /* diagonals may have moved, so kill the diagonal pointers */ 1004 if (fshift && a->diag) { 1005 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1006 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1007 a->diag = 0; 1008 } 1009 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1011 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1012 a->reallocs = 0; 1013 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1014 1015 #if defined(PETSC_HAVE_DSCPACK) 1016 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1017 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1018 #endif 1019 1020 PetscFunctionReturn(0); 1021 } 1022 1023 1024 1025 /* 1026 This function returns an array of flags which indicate the locations of contiguous 1027 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1028 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1029 Assume: sizes should be long enough to hold all the values. 1030 */ 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1033 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1034 { 1035 int i,j,k,row; 1036 PetscTruth flg; 1037 1038 PetscFunctionBegin; 1039 for (i=0,j=0; i<n; j++) { 1040 row = idx[i]; 1041 if (row%bs!=0) { /* Not the begining of a block */ 1042 sizes[j] = 1; 1043 i++; 1044 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1045 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1046 i++; 1047 } else { /* Begining of the block, so check if the complete block exists */ 1048 flg = PETSC_TRUE; 1049 for (k=1; k<bs; k++) { 1050 if (row+k != idx[i+k]) { /* break in the block */ 1051 flg = PETSC_FALSE; 1052 break; 1053 } 1054 } 1055 if (flg == PETSC_TRUE) { /* No break in the bs */ 1056 sizes[j] = bs; 1057 i+= bs; 1058 } else { 1059 sizes[j] = 1; 1060 i++; 1061 } 1062 } 1063 } 1064 *bs_max = j; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1070 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1071 { 1072 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1073 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1074 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1075 PetscScalar zero = 0.0; 1076 MatScalar *aa; 1077 1078 PetscFunctionBegin; 1079 /* Make a copy of the IS and sort it */ 1080 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1081 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1082 1083 /* allocate memory for rows,sizes */ 1084 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1085 sizes = rows + is_n; 1086 1087 /* copy IS values to rows, and sort them */ 1088 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1089 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1090 if (baij->keepzeroedrows) { 1091 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1092 bs_max = is_n; 1093 } else { 1094 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1095 } 1096 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1097 1098 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1099 row = rows[j]; 1100 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1101 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1102 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1103 if (sizes[i] == bs && !baij->keepzeroedrows) { 1104 if (diag) { 1105 if (baij->ilen[row/bs] > 0) { 1106 baij->ilen[row/bs] = 1; 1107 baij->j[baij->i[row/bs]] = row/bs; 1108 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1109 } 1110 /* Now insert all the diagonal values for this bs */ 1111 for (k=0; k<bs; k++) { 1112 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1113 } 1114 } else { /* (!diag) */ 1115 baij->ilen[row/bs] = 0; 1116 } /* end (!diag) */ 1117 } else { /* (sizes[i] != bs) */ 1118 #if defined (PETSC_USE_DEBUG) 1119 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1120 #endif 1121 for (k=0; k<count; k++) { 1122 aa[0] = zero; 1123 aa += bs; 1124 } 1125 if (diag) { 1126 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1127 } 1128 } 1129 } 1130 1131 ierr = PetscFree(rows);CHKERRQ(ierr); 1132 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1138 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1139 { 1140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1141 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1142 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1143 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1144 int ridx,cidx,bs2=a->bs2,ierr; 1145 PetscTruth roworiented=a->roworiented; 1146 MatScalar *ap,value,*aa=a->a,*bap; 1147 1148 PetscFunctionBegin; 1149 for (k=0; k<m; k++) { /* loop over added rows */ 1150 row = im[k]; brow = row/bs; 1151 if (row < 0) continue; 1152 #if defined(PETSC_USE_BOPT_g) 1153 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1154 #endif 1155 rp = aj + ai[brow]; 1156 ap = aa + bs2*ai[brow]; 1157 rmax = imax[brow]; 1158 nrow = ailen[brow]; 1159 low = 0; 1160 for (l=0; l<n; l++) { /* loop over added columns */ 1161 if (in[l] < 0) continue; 1162 #if defined(PETSC_USE_BOPT_g) 1163 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1164 #endif 1165 col = in[l]; bcol = col/bs; 1166 ridx = row % bs; cidx = col % bs; 1167 if (roworiented) { 1168 value = v[l + k*n]; 1169 } else { 1170 value = v[k + l*m]; 1171 } 1172 if (!sorted) low = 0; high = nrow; 1173 while (high-low > 7) { 1174 t = (low+high)/2; 1175 if (rp[t] > bcol) high = t; 1176 else low = t; 1177 } 1178 for (i=low; i<high; i++) { 1179 if (rp[i] > bcol) break; 1180 if (rp[i] == bcol) { 1181 bap = ap + bs2*i + bs*cidx + ridx; 1182 if (is == ADD_VALUES) *bap += value; 1183 else *bap = value; 1184 goto noinsert1; 1185 } 1186 } 1187 if (nonew == 1) goto noinsert1; 1188 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1189 if (nrow >= rmax) { 1190 /* there is no extra room in row, therefore enlarge */ 1191 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1192 MatScalar *new_a; 1193 1194 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1195 1196 /* Malloc new storage space */ 1197 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1198 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1199 new_j = (int*)(new_a + bs2*new_nz); 1200 new_i = new_j + new_nz; 1201 1202 /* copy over old data into new slots */ 1203 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1204 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1205 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1206 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1207 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1208 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1209 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1210 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1211 /* free up old matrix storage */ 1212 ierr = PetscFree(a->a);CHKERRQ(ierr); 1213 if (!a->singlemalloc) { 1214 ierr = PetscFree(a->i);CHKERRQ(ierr); 1215 ierr = PetscFree(a->j);CHKERRQ(ierr); 1216 } 1217 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1218 a->singlemalloc = PETSC_TRUE; 1219 1220 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1221 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1222 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1223 a->maxnz += bs2*CHUNKSIZE; 1224 a->reallocs++; 1225 a->nz++; 1226 } 1227 N = nrow++ - 1; 1228 /* shift up all the later entries in this row */ 1229 for (ii=N; ii>=i; ii--) { 1230 rp[ii+1] = rp[ii]; 1231 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1232 } 1233 if (N>=i) { 1234 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1235 } 1236 rp[i] = bcol; 1237 ap[bs2*i + bs*cidx + ridx] = value; 1238 noinsert1:; 1239 low = i; 1240 } 1241 ailen[brow] = nrow; 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1249 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1252 Mat outA; 1253 int ierr; 1254 PetscTruth row_identity,col_identity; 1255 1256 PetscFunctionBegin; 1257 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1258 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1259 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1260 if (!row_identity || !col_identity) { 1261 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1262 } 1263 1264 outA = inA; 1265 inA->factor = FACTOR_LU; 1266 1267 if (!a->diag) { 1268 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1269 } 1270 1271 a->row = row; 1272 a->col = col; 1273 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1274 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1275 1276 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1277 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1278 PetscLogObjectParent(inA,a->icol); 1279 1280 /* 1281 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1282 for ILU(0) factorization with natural ordering 1283 */ 1284 if (a->bs < 8) { 1285 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1286 } else { 1287 if (!a->solve_work) { 1288 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1289 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1290 } 1291 } 1292 1293 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1294 1295 PetscFunctionReturn(0); 1296 } 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1299 int MatPrintHelp_SeqBAIJ(Mat A) 1300 { 1301 static PetscTruth called = PETSC_FALSE; 1302 MPI_Comm comm = A->comm; 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1307 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1308 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 EXTERN_C_BEGIN 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1315 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1316 { 1317 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1318 int i,nz,nbs; 1319 1320 PetscFunctionBegin; 1321 nz = baij->maxnz/baij->bs2; 1322 nbs = baij->nbs; 1323 for (i=0; i<nz; i++) { 1324 baij->j[i] = indices[i]; 1325 } 1326 baij->nz = nz; 1327 for (i=0; i<nbs; i++) { 1328 baij->ilen[i] = baij->imax[i]; 1329 } 1330 1331 PetscFunctionReturn(0); 1332 } 1333 EXTERN_C_END 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1337 /*@ 1338 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1339 in the matrix. 1340 1341 Input Parameters: 1342 + mat - the SeqBAIJ matrix 1343 - indices - the column indices 1344 1345 Level: advanced 1346 1347 Notes: 1348 This can be called if you have precomputed the nonzero structure of the 1349 matrix and want to provide it to the matrix object to improve the performance 1350 of the MatSetValues() operation. 1351 1352 You MUST have set the correct numbers of nonzeros per row in the call to 1353 MatCreateSeqBAIJ(). 1354 1355 MUST be called before any calls to MatSetValues(); 1356 1357 @*/ 1358 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1359 { 1360 int ierr,(*f)(Mat,int *); 1361 1362 PetscFunctionBegin; 1363 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1364 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1365 if (f) { 1366 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1367 } else { 1368 SETERRQ(1,"Wrong type of matrix to set column indices"); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1375 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1376 { 1377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1378 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1379 PetscReal atmp; 1380 PetscScalar *x,zero = 0.0; 1381 MatScalar *aa; 1382 int ncols,brow,krow,kcol; 1383 1384 PetscFunctionBegin; 1385 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1386 bs = a->bs; 1387 aa = a->a; 1388 ai = a->i; 1389 aj = a->j; 1390 mbs = a->mbs; 1391 1392 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1393 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1394 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1395 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1396 for (i=0; i<mbs; i++) { 1397 ncols = ai[1] - ai[0]; ai++; 1398 brow = bs*i; 1399 for (j=0; j<ncols; j++){ 1400 /* bcol = bs*(*aj); */ 1401 for (kcol=0; kcol<bs; kcol++){ 1402 for (krow=0; krow<bs; krow++){ 1403 atmp = PetscAbsScalar(*aa); aa++; 1404 row = brow + krow; /* row index */ 1405 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1406 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1407 } 1408 } 1409 aj++; 1410 } 1411 } 1412 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1418 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1419 { 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1429 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1430 { 1431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1432 PetscFunctionBegin; 1433 *array = a->a; 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1439 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1440 { 1441 PetscFunctionBegin; 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #include "petscblaslapack.h" 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1448 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1449 { 1450 int ierr,one=1; 1451 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1452 1453 PetscFunctionBegin; 1454 if (str == SAME_NONZERO_PATTERN) { 1455 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1456 } else { 1457 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* -------------------------------------------------------------------*/ 1463 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1464 MatGetRow_SeqBAIJ, 1465 MatRestoreRow_SeqBAIJ, 1466 MatMult_SeqBAIJ_N, 1467 MatMultAdd_SeqBAIJ_N, 1468 MatMultTranspose_SeqBAIJ, 1469 MatMultTransposeAdd_SeqBAIJ, 1470 MatSolve_SeqBAIJ_N, 1471 0, 1472 0, 1473 0, 1474 MatLUFactor_SeqBAIJ, 1475 0, 1476 0, 1477 MatTranspose_SeqBAIJ, 1478 MatGetInfo_SeqBAIJ, 1479 MatEqual_SeqBAIJ, 1480 MatGetDiagonal_SeqBAIJ, 1481 MatDiagonalScale_SeqBAIJ, 1482 MatNorm_SeqBAIJ, 1483 0, 1484 MatAssemblyEnd_SeqBAIJ, 1485 0, 1486 MatSetOption_SeqBAIJ, 1487 MatZeroEntries_SeqBAIJ, 1488 MatZeroRows_SeqBAIJ, 1489 MatLUFactorSymbolic_SeqBAIJ, 1490 MatLUFactorNumeric_SeqBAIJ_N, 1491 0, 1492 0, 1493 MatSetUpPreallocation_SeqBAIJ, 1494 MatILUFactorSymbolic_SeqBAIJ, 1495 0, 1496 MatGetArray_SeqBAIJ, 1497 MatRestoreArray_SeqBAIJ, 1498 MatDuplicate_SeqBAIJ, 1499 0, 1500 0, 1501 MatILUFactor_SeqBAIJ, 1502 0, 1503 MatAXPY_SeqBAIJ, 1504 MatGetSubMatrices_SeqBAIJ, 1505 MatIncreaseOverlap_SeqBAIJ, 1506 MatGetValues_SeqBAIJ, 1507 0, 1508 MatPrintHelp_SeqBAIJ, 1509 MatScale_SeqBAIJ, 1510 0, 1511 0, 1512 0, 1513 MatGetBlockSize_SeqBAIJ, 1514 MatGetRowIJ_SeqBAIJ, 1515 MatRestoreRowIJ_SeqBAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 MatSetValuesBlocked_SeqBAIJ, 1523 MatGetSubMatrix_SeqBAIJ, 1524 MatDestroy_SeqBAIJ, 1525 MatView_SeqBAIJ, 1526 MatGetPetscMaps_Petsc, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 MatGetRowMax_SeqBAIJ, 1534 MatConvert_Basic}; 1535 1536 EXTERN_C_BEGIN 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1539 int MatStoreValues_SeqBAIJ(Mat mat) 1540 { 1541 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1542 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1543 int ierr; 1544 1545 PetscFunctionBegin; 1546 if (aij->nonew != 1) { 1547 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1548 } 1549 1550 /* allocate space for values if not already there */ 1551 if (!aij->saved_values) { 1552 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1553 } 1554 1555 /* copy values over */ 1556 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 EXTERN_C_END 1560 1561 EXTERN_C_BEGIN 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1564 int MatRetrieveValues_SeqBAIJ(Mat mat) 1565 { 1566 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1567 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1568 1569 PetscFunctionBegin; 1570 if (aij->nonew != 1) { 1571 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1572 } 1573 if (!aij->saved_values) { 1574 SETERRQ(1,"Must call MatStoreValues(A);first"); 1575 } 1576 1577 /* copy values over */ 1578 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 EXTERN_C_END 1582 1583 EXTERN_C_BEGIN 1584 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1585 EXTERN_C_END 1586 1587 EXTERN_C_BEGIN 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatCreate_SeqBAIJ" 1590 int MatCreate_SeqBAIJ(Mat B) 1591 { 1592 int ierr,size; 1593 Mat_SeqBAIJ *b; 1594 1595 PetscFunctionBegin; 1596 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1597 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1598 1599 B->m = B->M = PetscMax(B->m,B->M); 1600 B->n = B->N = PetscMax(B->n,B->N); 1601 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1602 B->data = (void*)b; 1603 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1604 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1605 B->factor = 0; 1606 B->lupivotthreshold = 1.0; 1607 B->mapping = 0; 1608 b->row = 0; 1609 b->col = 0; 1610 b->icol = 0; 1611 b->reallocs = 0; 1612 b->saved_values = 0; 1613 #if defined(PETSC_USE_MAT_SINGLE) 1614 b->setvalueslen = 0; 1615 b->setvaluescopy = PETSC_NULL; 1616 #endif 1617 1618 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1619 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1620 1621 b->sorted = PETSC_FALSE; 1622 b->roworiented = PETSC_TRUE; 1623 b->nonew = 0; 1624 b->diag = 0; 1625 b->solve_work = 0; 1626 b->mult_work = 0; 1627 B->spptr = 0; 1628 B->info.nz_unneeded = (PetscReal)b->maxnz; 1629 b->keepzeroedrows = PETSC_FALSE; 1630 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1632 "MatStoreValues_SeqBAIJ", 1633 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1635 "MatRetrieveValues_SeqBAIJ", 1636 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1638 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1639 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1641 "MatConvert_SeqBAIJ_SeqAIJ", 1642 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 EXTERN_C_END 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1649 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1650 { 1651 Mat C; 1652 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1653 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1654 1655 PetscFunctionBegin; 1656 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1657 1658 *B = 0; 1659 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1660 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1661 c = (Mat_SeqBAIJ*)C->data; 1662 1663 c->bs = a->bs; 1664 c->bs2 = a->bs2; 1665 c->mbs = a->mbs; 1666 c->nbs = a->nbs; 1667 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1668 1669 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1670 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1671 for (i=0; i<mbs; i++) { 1672 c->imax[i] = a->imax[i]; 1673 c->ilen[i] = a->ilen[i]; 1674 } 1675 1676 /* allocate the matrix space */ 1677 c->singlemalloc = PETSC_TRUE; 1678 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1679 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1680 c->j = (int*)(c->a + nz*bs2); 1681 c->i = c->j + nz; 1682 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1683 if (mbs > 0) { 1684 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1685 if (cpvalues == MAT_COPY_VALUES) { 1686 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1687 } else { 1688 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1689 } 1690 } 1691 1692 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1693 c->sorted = a->sorted; 1694 c->roworiented = a->roworiented; 1695 c->nonew = a->nonew; 1696 1697 if (a->diag) { 1698 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1699 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1700 for (i=0; i<mbs; i++) { 1701 c->diag[i] = a->diag[i]; 1702 } 1703 } else c->diag = 0; 1704 c->nz = a->nz; 1705 c->maxnz = a->maxnz; 1706 c->solve_work = 0; 1707 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1708 c->mult_work = 0; 1709 C->preallocated = PETSC_TRUE; 1710 C->assembled = PETSC_TRUE; 1711 *B = C; 1712 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 1716 EXTERN_C_BEGIN 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatLoad_SeqBAIJ" 1719 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1720 { 1721 Mat_SeqBAIJ *a; 1722 Mat B; 1723 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1724 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1725 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1726 int *masked,nmask,tmp,bs2,ishift; 1727 PetscScalar *aa; 1728 MPI_Comm comm = ((PetscObject)viewer)->comm; 1729 1730 PetscFunctionBegin; 1731 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1732 bs2 = bs*bs; 1733 1734 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1735 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1736 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1737 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1738 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1739 M = header[1]; N = header[2]; nz = header[3]; 1740 1741 if (header[3] < 0) { 1742 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1743 } 1744 1745 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1746 1747 /* 1748 This code adds extra rows to make sure the number of rows is 1749 divisible by the blocksize 1750 */ 1751 mbs = M/bs; 1752 extra_rows = bs - M + bs*(mbs); 1753 if (extra_rows == bs) extra_rows = 0; 1754 else mbs++; 1755 if (extra_rows) { 1756 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1757 } 1758 1759 /* read in row lengths */ 1760 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1761 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1762 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1763 1764 /* read in column indices */ 1765 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1766 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1767 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1768 1769 /* loop over row lengths determining block row lengths */ 1770 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1771 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1772 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1773 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1774 masked = mask + mbs; 1775 rowcount = 0; nzcount = 0; 1776 for (i=0; i<mbs; i++) { 1777 nmask = 0; 1778 for (j=0; j<bs; j++) { 1779 kmax = rowlengths[rowcount]; 1780 for (k=0; k<kmax; k++) { 1781 tmp = jj[nzcount++]/bs; 1782 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1783 } 1784 rowcount++; 1785 } 1786 browlengths[i] += nmask; 1787 /* zero out the mask elements we set */ 1788 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1789 } 1790 1791 /* create our matrix */ 1792 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1793 B = *A; 1794 a = (Mat_SeqBAIJ*)B->data; 1795 1796 /* set matrix "i" values */ 1797 a->i[0] = 0; 1798 for (i=1; i<= mbs; i++) { 1799 a->i[i] = a->i[i-1] + browlengths[i-1]; 1800 a->ilen[i-1] = browlengths[i-1]; 1801 } 1802 a->nz = 0; 1803 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1804 1805 /* read in nonzero values */ 1806 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1807 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1808 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1809 1810 /* set "a" and "j" values into matrix */ 1811 nzcount = 0; jcount = 0; 1812 for (i=0; i<mbs; i++) { 1813 nzcountb = nzcount; 1814 nmask = 0; 1815 for (j=0; j<bs; j++) { 1816 kmax = rowlengths[i*bs+j]; 1817 for (k=0; k<kmax; k++) { 1818 tmp = jj[nzcount++]/bs; 1819 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1820 } 1821 } 1822 /* sort the masked values */ 1823 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1824 1825 /* set "j" values into matrix */ 1826 maskcount = 1; 1827 for (j=0; j<nmask; j++) { 1828 a->j[jcount++] = masked[j]; 1829 mask[masked[j]] = maskcount++; 1830 } 1831 /* set "a" values into matrix */ 1832 ishift = bs2*a->i[i]; 1833 for (j=0; j<bs; j++) { 1834 kmax = rowlengths[i*bs+j]; 1835 for (k=0; k<kmax; k++) { 1836 tmp = jj[nzcountb]/bs ; 1837 block = mask[tmp] - 1; 1838 point = jj[nzcountb] - bs*tmp; 1839 idx = ishift + bs2*block + j + bs*point; 1840 a->a[idx] = (MatScalar)aa[nzcountb++]; 1841 } 1842 } 1843 /* zero out the mask elements we set */ 1844 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1845 } 1846 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1847 1848 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1849 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1850 ierr = PetscFree(aa);CHKERRQ(ierr); 1851 ierr = PetscFree(jj);CHKERRQ(ierr); 1852 ierr = PetscFree(mask);CHKERRQ(ierr); 1853 1854 B->assembled = PETSC_TRUE; 1855 1856 ierr = MatView_Private(B);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 EXTERN_C_END 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatCreateSeqBAIJ" 1863 /*@C 1864 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1865 compressed row) format. For good matrix assembly performance the 1866 user should preallocate the matrix storage by setting the parameter nz 1867 (or the array nnz). By setting these parameters accurately, performance 1868 during matrix assembly can be increased by more than a factor of 50. 1869 1870 Collective on MPI_Comm 1871 1872 Input Parameters: 1873 + comm - MPI communicator, set to PETSC_COMM_SELF 1874 . bs - size of block 1875 . m - number of rows 1876 . n - number of columns 1877 . nz - number of nonzero blocks per block row (same for all rows) 1878 - nnz - array containing the number of nonzero blocks in the various block rows 1879 (possibly different for each block row) or PETSC_NULL 1880 1881 Output Parameter: 1882 . A - the matrix 1883 1884 Options Database Keys: 1885 . -mat_no_unroll - uses code that does not unroll the loops in the 1886 block calculations (much slower) 1887 . -mat_block_size - size of the blocks to use 1888 1889 Level: intermediate 1890 1891 Notes: 1892 A nonzero block is any block that as 1 or more nonzeros in it 1893 1894 The block AIJ format is fully compatible with standard Fortran 77 1895 storage. That is, the stored row and column indices can begin at 1896 either one (as in Fortran) or zero. See the users' manual for details. 1897 1898 Specify the preallocated storage with either nz or nnz (not both). 1899 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1900 allocation. For additional details, see the users manual chapter on 1901 matrices. 1902 1903 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1904 @*/ 1905 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1906 { 1907 int ierr; 1908 1909 PetscFunctionBegin; 1910 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1911 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1912 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1918 /*@C 1919 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1920 per row in the matrix. For good matrix assembly performance the 1921 user should preallocate the matrix storage by setting the parameter nz 1922 (or the array nnz). By setting these parameters accurately, performance 1923 during matrix assembly can be increased by more than a factor of 50. 1924 1925 Collective on MPI_Comm 1926 1927 Input Parameters: 1928 + A - the matrix 1929 . bs - size of block 1930 . nz - number of block nonzeros per block row (same for all rows) 1931 - nnz - array containing the number of block nonzeros in the various block rows 1932 (possibly different for each block row) or PETSC_NULL 1933 1934 Options Database Keys: 1935 . -mat_no_unroll - uses code that does not unroll the loops in the 1936 block calculations (much slower) 1937 . -mat_block_size - size of the blocks to use 1938 1939 Level: intermediate 1940 1941 Notes: 1942 The block AIJ format is fully compatible with standard Fortran 77 1943 storage. That is, the stored row and column indices can begin at 1944 either one (as in Fortran) or zero. See the users' manual for details. 1945 1946 Specify the preallocated storage with either nz or nnz (not both). 1947 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1948 allocation. For additional details, see the users manual chapter on 1949 matrices. 1950 1951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1952 @*/ 1953 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1954 { 1955 Mat_SeqBAIJ *b; 1956 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1957 PetscTruth flg; 1958 1959 PetscFunctionBegin; 1960 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1961 if (!flg) PetscFunctionReturn(0); 1962 1963 B->preallocated = PETSC_TRUE; 1964 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1965 if (nnz && newbs != bs) { 1966 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1967 } 1968 bs = newbs; 1969 1970 mbs = B->m/bs; 1971 nbs = B->n/bs; 1972 bs2 = bs*bs; 1973 1974 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1975 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1976 } 1977 1978 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1979 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1980 if (nnz) { 1981 for (i=0; i<mbs; i++) { 1982 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1983 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1984 } 1985 } 1986 1987 b = (Mat_SeqBAIJ*)B->data; 1988 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1989 B->ops->solve = MatSolve_SeqBAIJ_Update; 1990 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1991 if (!flg) { 1992 switch (bs) { 1993 case 1: 1994 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1995 B->ops->mult = MatMult_SeqBAIJ_1; 1996 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1997 break; 1998 case 2: 1999 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2000 B->ops->mult = MatMult_SeqBAIJ_2; 2001 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2002 break; 2003 case 3: 2004 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2005 B->ops->mult = MatMult_SeqBAIJ_3; 2006 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2007 break; 2008 case 4: 2009 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2010 B->ops->mult = MatMult_SeqBAIJ_4; 2011 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2012 break; 2013 case 5: 2014 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2015 B->ops->mult = MatMult_SeqBAIJ_5; 2016 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2017 break; 2018 case 6: 2019 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2020 B->ops->mult = MatMult_SeqBAIJ_6; 2021 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2022 break; 2023 case 7: 2024 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2025 B->ops->mult = MatMult_SeqBAIJ_7; 2026 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2027 break; 2028 default: 2029 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2030 B->ops->mult = MatMult_SeqBAIJ_N; 2031 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2032 break; 2033 } 2034 } 2035 b->bs = bs; 2036 b->mbs = mbs; 2037 b->nbs = nbs; 2038 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2039 if (!nnz) { 2040 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2041 else if (nz <= 0) nz = 1; 2042 for (i=0; i<mbs; i++) b->imax[i] = nz; 2043 nz = nz*mbs; 2044 } else { 2045 nz = 0; 2046 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2047 } 2048 2049 /* allocate the matrix space */ 2050 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2051 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2052 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2053 b->j = (int*)(b->a + nz*bs2); 2054 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2055 b->i = b->j + nz; 2056 b->singlemalloc = PETSC_TRUE; 2057 2058 b->i[0] = 0; 2059 for (i=1; i<mbs+1; i++) { 2060 b->i[i] = b->i[i-1] + b->imax[i-1]; 2061 } 2062 2063 /* b->ilen will count nonzeros in each block row so far. */ 2064 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2065 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2066 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2067 2068 b->bs = bs; 2069 b->bs2 = bs2; 2070 b->mbs = mbs; 2071 b->nz = 0; 2072 b->maxnz = nz*bs2; 2073 B->info.nz_unneeded = (PetscReal)b->maxnz; 2074 PetscFunctionReturn(0); 2075 } 2076 2077