1 /*$Id: sbaij.c,v 1.35 2000/10/12 14:03:03 hzhang Exp hzhang $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 #include "src/mat/impls/sbaij/seq/sbaij.h" 12 13 #define CHUNKSIZE 10 14 15 /* 16 Checks for missing diagonals 17 */ 18 #undef __FUNC__ 19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ" 20 int MatMissingDiagonal_SeqSBAIJ(Mat A) 21 { 22 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23 int *diag,*jj = a->j,i,ierr; 24 25 PetscFunctionBegin; 26 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 27 diag = a->diag; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 31 } 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNC__ 37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,j,*diag,m = a->mbs; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 47 PLogObjectMemory(A,(m+1)*sizeof(int)); 48 for (i=0; i<m; i++) { 49 diag[i] = a->i[i+1]; 50 for (j=a->i[i]; j<a->i[i+1]; j++) { 51 if (a->j[j] == i) { 52 diag[i] = j; 53 break; 54 } 55 } 56 } 57 a->diag = diag; 58 PetscFunctionReturn(0); 59 } 60 61 62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 63 64 #undef __FUNC__ 65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ" 66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 69 70 PetscFunctionBegin; 71 if (ia) { 72 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 73 } 74 *nn = a->mbs; 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNC__ 79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ" 80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 81 { 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ" 90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNC__ 100 #define __FUNC__ "MatDestroy_SeqSBAIJ" 101 int MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 int ierr; 105 106 PetscFunctionBegin; 107 if (A->mapping) { 108 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 109 } 110 if (A->bmapping) { 111 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 112 } 113 if (A->rmap) { 114 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 115 } 116 if (A->cmap) { 117 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 118 } 119 #if defined(PETSC_USE_LOG) 120 PLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 121 #endif 122 ierr = PetscFree(a->a);CHKERRQ(ierr); 123 if (!a->singlemalloc) { 124 ierr = PetscFree(a->i);CHKERRQ(ierr); 125 ierr = PetscFree(a->j);CHKERRQ(ierr); 126 } 127 if (a->row) { 128 ierr = ISDestroy(a->row);CHKERRQ(ierr); 129 } 130 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 131 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 132 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 133 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 134 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 135 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 136 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 137 138 if (a->inew){ 139 ierr = PetscFree(a->inew);CHKERRQ(ierr); 140 a->inew = 0; 141 } 142 ierr = PetscFree(a);CHKERRQ(ierr); 143 PLogObjectDestroy(A); 144 PetscHeaderDestroy(A); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNC__ 149 #define __FUNC__ "MatSetOption_SeqSBAIJ" 150 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 154 PetscFunctionBegin; 155 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 156 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 157 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 158 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 159 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 160 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 161 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 162 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 163 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 164 else if (op == MAT_ROWS_SORTED || 165 op == MAT_ROWS_UNSORTED || 166 op == MAT_SYMMETRIC || 167 op == MAT_STRUCTURALLY_SYMMETRIC || 168 op == MAT_YES_NEW_DIAGONALS || 169 op == MAT_IGNORE_OFF_PROC_ENTRIES || 170 op == MAT_USE_HASH_TABLE) { 171 PLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 172 } else if (op == MAT_NO_NEW_DIAGONALS) { 173 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 174 } else { 175 SETERRQ(PETSC_ERR_SUP,"unknown option"); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ" 182 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n) 183 { 184 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 185 186 PetscFunctionBegin; 187 *m = 0; 188 *n = A->m; 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatGetRow_SeqSBAIJ" 194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 198 MatScalar *aa,*aa_i; 199 Scalar *v_i; 200 201 PetscFunctionBegin; 202 bs = a->bs; 203 ai = a->i; 204 aj = a->j; 205 aa = a->a; 206 bs2 = a->bs2; 207 208 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 209 210 bn = row/bs; /* Block number */ 211 bp = row % bs; /* Block position */ 212 M = ai[bn+1] - ai[bn]; 213 *ncols = bs*M; 214 215 if (v) { 216 *v = 0; 217 if (*ncols) { 218 *v = (Scalar*)PetscMalloc((*ncols+row)*sizeof(Scalar));CHKPTRQ(*v); 219 for (i=0; i<M; i++) { /* for each block in the block row */ 220 v_i = *v + i*bs; 221 aa_i = aa + bs2*(ai[bn] + i); 222 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 223 } 224 } 225 } 226 227 if (cols) { 228 *cols = 0; 229 if (*ncols) { 230 *cols = (int*)PetscMalloc((*ncols+row)*sizeof(int));CHKPTRQ(*cols); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 cols_i = *cols + i*bs; 233 itmp = bs*aj[ai[bn] + i]; 234 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 235 } 236 } 237 } 238 239 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 240 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 241 #ifdef column_search 242 v_i = *v + M*bs; 243 cols_i = *cols + M*bs; 244 for (i=0; i<bn; i++){ /* for each block row */ 245 M = ai[i+1] - ai[i]; 246 for (j=0; j<M; j++){ 247 itmp = aj[ai[i] + j]; /* block column value */ 248 if (itmp == bn){ 249 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 250 for (k=0; k<bs; k++) { 251 *cols_i++ = i*bs+k; 252 *v_i++ = aa_i[k]; 253 } 254 *ncols += bs; 255 break; 256 } 257 } 258 } 259 #endif 260 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNC__ 265 #define __FUNC__ "MatRestoreRow_SeqSBAIJ" 266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatTranspose_SeqSBAIJ" 278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 PetscFunctionBegin; 281 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 282 /* PetscFunctionReturn(0); */ 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatView_SeqSBAIJ_Binary" 287 static int MatView_SeqSBAIJ_Binary(Mat A,Viewer viewer) 288 { 289 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 290 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 291 Scalar *aa; 292 FILE *file; 293 294 PetscFunctionBegin; 295 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 296 col_lens = (int*)PetscMalloc((4+A->m)*sizeof(int));CHKPTRQ(col_lens); 297 col_lens[0] = MAT_COOKIE; 298 299 col_lens[1] = A->m; 300 col_lens[2] = A->m; 301 col_lens[3] = a->s_nz*bs2; 302 303 /* store lengths of each row and write (including header) to file */ 304 count = 0; 305 for (i=0; i<a->mbs; i++) { 306 for (j=0; j<bs; j++) { 307 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 308 } 309 } 310 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 311 ierr = PetscFree(col_lens);CHKERRQ(ierr); 312 313 /* store column indices (zero start index) */ 314 jj = (int*)PetscMalloc((a->s_nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 315 count = 0; 316 for (i=0; i<a->mbs; i++) { 317 for (j=0; j<bs; j++) { 318 for (k=a->i[i]; k<a->i[i+1]; k++) { 319 for (l=0; l<bs; l++) { 320 jj[count++] = bs*a->j[k] + l; 321 } 322 } 323 } 324 } 325 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 326 ierr = PetscFree(jj);CHKERRQ(ierr); 327 328 /* store nonzero values */ 329 aa = (Scalar*)PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 330 count = 0; 331 for (i=0; i<a->mbs; i++) { 332 for (j=0; j<bs; j++) { 333 for (k=a->i[i]; k<a->i[i+1]; k++) { 334 for (l=0; l<bs; l++) { 335 aa[count++] = a->a[bs2*k + l*bs + j]; 336 } 337 } 338 } 339 } 340 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 341 ierr = PetscFree(aa);CHKERRQ(ierr); 342 343 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 344 if (file) { 345 fprintf(file,"-matload_block_size %d\n",a->bs); 346 } 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatView_SeqSBAIJ_ASCII" 352 static int MatView_SeqSBAIJ_ASCII(Mat A,Viewer viewer) 353 { 354 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 355 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 356 char *outputname; 357 358 PetscFunctionBegin; 359 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 360 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 361 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 362 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 363 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 364 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 365 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 366 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 367 for (i=0; i<a->mbs; i++) { 368 for (j=0; j<bs; j++) { 369 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 370 for (k=a->i[i]; k<a->i[i+1]; k++) { 371 for (l=0; l<bs; l++) { 372 #if defined(PETSC_USE_COMPLEX) 373 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 378 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 380 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 381 } 382 #else 383 if (a->a[bs2*k + l*bs + j] != 0.0) { 384 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 385 } 386 #endif 387 } 388 } 389 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 390 } 391 } 392 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 393 } else { 394 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 395 for (i=0; i<a->mbs; i++) { 396 for (j=0; j<bs; j++) { 397 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 398 for (k=a->i[i]; k<a->i[i+1]; k++) { 399 for (l=0; l<bs; l++) { 400 #if defined(PETSC_USE_COMPLEX) 401 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 402 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 405 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 406 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } else { 408 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 409 } 410 #else 411 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 412 #endif 413 } 414 } 415 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 416 } 417 } 418 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 419 } 420 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom" 426 static int MatView_SeqSBAIJ_Draw_Zoom(Draw draw,void *Aa) 427 { 428 Mat A = (Mat) Aa; 429 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 430 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 431 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 432 MatScalar *aa; 433 MPI_Comm comm; 434 Viewer viewer; 435 436 PetscFunctionBegin; 437 /* 438 This is nasty. If this is called from an originally parallel matrix 439 then all processes call this,but only the first has the matrix so the 440 rest should return immediately. 441 */ 442 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 443 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 444 if (rank) PetscFunctionReturn(0); 445 446 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 447 448 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 449 DrawString(draw, .3*(xl+xr), .3*(yl+yr), DRAW_BLACK, "symmetric"); 450 451 /* loop over matrix elements drawing boxes */ 452 color = DRAW_BLUE; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) >= 0.) continue; 461 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 color = DRAW_CYAN; 467 for (i=0,row=0; i<mbs; i++,row+=bs) { 468 for (j=a->i[i]; j<a->i[i+1]; j++) { 469 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 470 x_l = a->j[j]*bs; x_r = x_l + 1.0; 471 aa = a->a + j*bs2; 472 for (k=0; k<bs; k++) { 473 for (l=0; l<bs; l++) { 474 if (PetscRealPart(*aa++) != 0.) continue; 475 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 476 } 477 } 478 } 479 } 480 481 color = DRAW_RED; 482 for (i=0,row=0; i<mbs; i++,row+=bs) { 483 for (j=a->i[i]; j<a->i[i+1]; j++) { 484 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 485 x_l = a->j[j]*bs; x_r = x_l + 1.0; 486 aa = a->a + j*bs2; 487 for (k=0; k<bs; k++) { 488 for (l=0; l<bs; l++) { 489 if (PetscRealPart(*aa++) <= 0.) continue; 490 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 491 } 492 } 493 } 494 } 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ "MatView_SeqSBAIJ_Draw" 500 static int MatView_SeqSBAIJ_Draw(Mat A,Viewer viewer) 501 { 502 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 503 int ierr; 504 PetscReal xl,yl,xr,yr,w,h; 505 Draw draw; 506 PetscTruth isnull; 507 508 PetscFunctionBegin; 509 510 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 511 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 512 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 514 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 515 xr += w; yr += h; xl = -w; yl = -h; 516 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 517 ierr = DrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 518 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNC__ 523 #define __FUNC__ "MatView_SeqSBAIJ" 524 int MatView_SeqSBAIJ(Mat A,Viewer viewer) 525 { 526 int ierr; 527 PetscTruth issocket,isascii,isbinary,isdraw; 528 529 PetscFunctionBegin; 530 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 531 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 534 if (issocket) { 535 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 536 } else if (isascii){ 537 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 538 } else if (isbinary) { 539 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 540 } else if (isdraw) { 541 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 542 } else { 543 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 549 #undef __FUNC__ 550 #define __FUNC__ "MatGetValues_SeqSBAIJ" 551 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 552 { 553 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 554 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 555 int *ai = a->i,*ailen = a->ilen; 556 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 557 MatScalar *ap,*aa = a->a,zero = 0.0; 558 559 PetscFunctionBegin; 560 for (k=0; k<m; k++) { /* loop over rows */ 561 row = im[k]; brow = row/bs; 562 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 565 nrow = ailen[brow]; 566 for (l=0; l<n; l++) { /* loop over columns */ 567 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 col = in[l] ; 570 bcol = col/bs; 571 cidx = col%bs; 572 ridx = row%bs; 573 high = nrow; 574 low = 0; /* assume unsorted */ 575 while (high-low > 5) { 576 t = (low+high)/2; 577 if (rp[t] > bcol) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > bcol) break; 582 if (rp[i] == bcol) { 583 *v++ = ap[bs2*i+bs*cidx+ridx]; 584 goto finished; 585 } 586 } 587 *v++ = zero; 588 finished:; 589 } 590 } 591 PetscFunctionReturn(0); 592 } 593 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ" 597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 598 { 599 PetscFunctionBegin; 600 SETERRQ(1,"Function not yet written for SBAIJ format"); 601 /* PetscFunctionReturn(0); */ 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ" 606 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 607 { 608 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 609 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 610 int m = A->m,*ip,N,*ailen = a->ilen; 611 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 612 MatScalar *aa = a->a,*ap; 613 614 PetscFunctionBegin; 615 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 616 617 if (m) rmax = ailen[0]; 618 for (i=1; i<mbs; i++) { 619 /* move each row back by the amount of empty slots (fshift) before it*/ 620 fshift += imax[i-1] - ailen[i-1]; 621 rmax = PetscMax(rmax,ailen[i]); 622 if (fshift) { 623 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 624 N = ailen[i]; 625 for (j=0; j<N; j++) { 626 ip[j-fshift] = ip[j]; 627 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 628 } 629 } 630 ai[i] = ai[i-1] + ailen[i-1]; 631 } 632 if (mbs) { 633 fshift += imax[mbs-1] - ailen[mbs-1]; 634 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 635 } 636 /* reset ilen and imax for each row */ 637 for (i=0; i<mbs; i++) { 638 ailen[i] = imax[i] = ai[i+1] - ai[i]; 639 } 640 a->s_nz = ai[mbs]; 641 642 /* diagonals may have moved, so kill the diagonal pointers */ 643 if (fshift && a->diag) { 644 ierr = PetscFree(a->diag);CHKERRQ(ierr); 645 PLogObjectMemory(A,-(m+1)*sizeof(int)); 646 a->diag = 0; 647 } 648 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 649 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 650 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 651 a->reallocs); 652 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 653 a->reallocs = 0; 654 A->info.nz_unneeded = (PetscReal)fshift*bs2; 655 656 PetscFunctionReturn(0); 657 } 658 659 /* 660 This function returns an array of flags which indicate the locations of contiguous 661 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 662 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 663 Assume: sizes should be long enough to hold all the values. 664 */ 665 #undef __FUNC__ 666 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 667 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 668 { 669 int i,j,k,row; 670 PetscTruth flg; 671 672 PetscFunctionBegin; 673 for (i=0,j=0; i<n; j++) { 674 row = idx[i]; 675 if (row%bs!=0) { /* Not the begining of a block */ 676 sizes[j] = 1; 677 i++; 678 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 679 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 680 i++; 681 } else { /* Begining of the block, so check if the complete block exists */ 682 flg = PETSC_TRUE; 683 for (k=1; k<bs; k++) { 684 if (row+k != idx[i+k]) { /* break in the block */ 685 flg = PETSC_FALSE; 686 break; 687 } 688 } 689 if (flg == PETSC_TRUE) { /* No break in the bs */ 690 sizes[j] = bs; 691 i+= bs; 692 } else { 693 sizes[j] = 1; 694 i++; 695 } 696 } 697 } 698 *bs_max = j; 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNC__ 703 #define __FUNC__ "MatZeroRows_SeqSBAIJ" 704 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag) 705 { 706 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 707 int ierr,i,j,k,count,is_n,*is_idx,*rows; 708 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 709 Scalar zero = 0.0; 710 MatScalar *aa; 711 712 PetscFunctionBegin; 713 /* Make a copy of the IS and sort it */ 714 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 715 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 716 717 /* allocate memory for rows,sizes */ 718 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 719 sizes = rows + is_n; 720 721 /* initialize copy IS values to rows, and sort them */ 722 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 723 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 724 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 725 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 726 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 727 } else { 728 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 729 } 730 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 731 732 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 733 row = rows[j]; /* row to be zeroed */ 734 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 735 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 736 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 737 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 738 if (diag) { 739 if (sbaij->ilen[row/bs] > 0) { 740 sbaij->ilen[row/bs] = 1; 741 sbaij->j[sbaij->i[row/bs]] = row/bs; 742 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 743 } 744 /* Now insert all the diagoanl values for this bs */ 745 for (k=0; k<bs; k++) { 746 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 747 } 748 } else { /* (!diag) */ 749 sbaij->ilen[row/bs] = 0; 750 } /* end (!diag) */ 751 } else { /* (sizes[i] != bs), broken block */ 752 #if defined (PETSC_USE_DEBUG) 753 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 754 #endif 755 for (k=0; k<count; k++) { 756 aa[0] = zero; 757 aa+=bs; 758 } 759 if (diag) { 760 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 761 } 762 } 763 } 764 765 ierr = PetscFree(rows);CHKERRQ(ierr); 766 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 /* Only add/insert a(i,j) with i<=j (blocks). 772 Any a(i,j) with i>j input by user is ingored. 773 */ 774 775 #undef __FUNC__ 776 #define __FUNC__ "MatSetValues_SeqSBAIJ" 777 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 778 { 779 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 780 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 781 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 782 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 783 int ridx,cidx,bs2=a->bs2,ierr; 784 MatScalar *ap,value,*aa=a->a,*bap; 785 786 PetscFunctionBegin; 787 788 for (k=0; k<m; k++) { /* loop over added rows */ 789 row = im[k]; /* row number */ 790 brow = row/bs; /* block row number */ 791 if (row < 0) continue; 792 #if defined(PETSC_USE_BOPT_g) 793 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 794 #endif 795 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 796 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 797 rmax = imax[brow]; /* maximum space allocated for this row */ 798 nrow = ailen[brow]; /* actual length of this row */ 799 low = 0; 800 801 for (l=0; l<n; l++) { /* loop over added columns */ 802 if (in[l] < 0) continue; 803 #if defined(PETSC_USE_BOPT_g) 804 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 805 #endif 806 col = in[l]; 807 bcol = col/bs; /* block col number */ 808 809 if (brow <= bcol){ 810 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 811 /* if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ */ 812 /* element value a(k,l) */ 813 if (roworiented) { 814 value = v[l + k*n]; 815 } else { 816 value = v[k + l*m]; 817 } 818 819 /* move pointer bap to a(k,l) quickly and add/insert value */ 820 if (!sorted) low = 0; high = nrow; 821 while (high-low > 7) { 822 t = (low+high)/2; 823 if (rp[t] > bcol) high = t; 824 else low = t; 825 } 826 for (i=low; i<high; i++) { 827 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 828 if (rp[i] > bcol) break; 829 if (rp[i] == bcol) { 830 bap = ap + bs2*i + bs*cidx + ridx; 831 if (is == ADD_VALUES) *bap += value; 832 else *bap = value; 833 goto noinsert1; 834 } 835 } 836 837 if (nonew == 1) goto noinsert1; 838 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 839 if (nrow >= rmax) { 840 /* there is no extra room in row, therefore enlarge */ 841 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 842 MatScalar *new_a; 843 844 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 845 846 /* Malloc new storage space */ 847 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 848 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 849 new_j = (int*)(new_a + bs2*new_nz); 850 new_i = new_j + new_nz; 851 852 /* copy over old data into new slots */ 853 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 854 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 855 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 856 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 857 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 858 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 859 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 860 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 861 /* free up old matrix storage */ 862 ierr = PetscFree(a->a);CHKERRQ(ierr); 863 if (!a->singlemalloc) { 864 ierr = PetscFree(a->i);CHKERRQ(ierr); 865 ierr = PetscFree(a->j);CHKERRQ(ierr); 866 } 867 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 868 a->singlemalloc = PETSC_TRUE; 869 870 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 871 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 872 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 873 a->s_maxnz += bs2*CHUNKSIZE; 874 a->reallocs++; 875 a->s_nz++; 876 } 877 878 N = nrow++ - 1; 879 /* shift up all the later entries in this row */ 880 for (ii=N; ii>=i; ii--) { 881 rp[ii+1] = rp[ii]; 882 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 883 } 884 if (N>=i) { 885 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 886 } 887 rp[i] = bcol; 888 ap[bs2*i + bs*cidx + ridx] = value; 889 noinsert1:; 890 low = i; 891 /* } */ 892 } /* end of if .. if.. */ 893 } /* end of loop over added columns */ 894 ailen[brow] = nrow; 895 } /* end of loop over added rows */ 896 897 PetscFunctionReturn(0); 898 } 899 900 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 901 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 902 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 903 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 904 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 905 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 906 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 907 extern int MatScale_SeqSBAIJ(Scalar*,Mat); 908 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 909 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 910 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 911 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 912 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 913 extern int MatZeroEntries_SeqSBAIJ(Mat); 914 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 915 916 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 917 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 918 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 919 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 920 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 921 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 922 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 923 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 924 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 925 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 926 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 927 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 928 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 929 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 930 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 931 932 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 933 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 934 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 935 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 936 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 939 940 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 941 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 942 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 943 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 944 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 945 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 946 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 947 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 948 949 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 950 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 951 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 952 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 953 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 954 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 955 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 956 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 957 958 #ifdef HAVE_MatIncompleteCholeskyFactor 959 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 960 #undef __FUNC__ 961 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ" 962 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 963 { 964 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 965 Mat outA; 966 int ierr; 967 PetscTruth row_identity,col_identity; 968 969 PetscFunctionBegin; 970 /* 971 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 972 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 973 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 974 if (!row_identity || !col_identity) { 975 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 976 } 977 */ 978 979 outA = inA; 980 inA->factor = FACTOR_LU; 981 982 if (!a->diag) { 983 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 984 } 985 /* 986 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 987 for ILU(0) factorization with natural ordering 988 */ 989 switch (a->bs) { 990 case 1: 991 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; /* 2? */ 992 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 993 case 2: 994 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 995 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 996 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 997 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 998 break; 999 case 3: 1000 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1001 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1002 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1003 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1004 break; 1005 case 4: 1006 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1007 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1008 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1009 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1010 break; 1011 case 5: 1012 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1013 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1014 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1015 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1016 break; 1017 case 6: 1018 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1019 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1020 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1021 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1022 break; 1023 case 7: 1024 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1025 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1026 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1027 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1028 break; 1029 default: 1030 a->row = row; 1031 a->icol = col; 1032 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1033 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1034 1035 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1036 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1037 PLogObjectParent(inA,a->icol); 1038 1039 if (!a->solve_work) { 1040 a->solve_work = (Scalar*)PetscMalloc((A->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1041 PLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar)); 1042 } 1043 } 1044 1045 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1046 1047 PetscFunctionReturn(0); 1048 } 1049 #endif 1050 1051 #undef __FUNC__ 1052 #define __FUNC__ "MatPrintHelp_SeqSBAIJ" 1053 int MatPrintHelp_SeqSBAIJ(Mat A) 1054 { 1055 static PetscTruth called = PETSC_FALSE; 1056 MPI_Comm comm = A->comm; 1057 int ierr; 1058 1059 PetscFunctionBegin; 1060 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1061 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1062 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 EXTERN_C_BEGIN 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1069 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1070 { 1071 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1072 int i,nz,n; 1073 1074 PetscFunctionBegin; 1075 nz = baij->s_maxnz; 1076 n = mat->n; 1077 for (i=0; i<nz; i++) { 1078 baij->j[i] = indices[i]; 1079 } 1080 baij->s_nz = nz; 1081 for (i=0; i<n; i++) { 1082 baij->ilen[i] = baij->imax[i]; 1083 } 1084 1085 PetscFunctionReturn(0); 1086 } 1087 EXTERN_C_END 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ "MatSeqSBAIJSetColumnIndices" 1091 /*@ 1092 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1093 in the matrix. 1094 1095 Input Parameters: 1096 + mat - the SeqSBAIJ matrix 1097 - indices - the column indices 1098 1099 Level: advanced 1100 1101 Notes: 1102 This can be called if you have precomputed the nonzero structure of the 1103 matrix and want to provide it to the matrix object to improve the performance 1104 of the MatSetValues() operation. 1105 1106 You MUST have set the correct numbers of nonzeros per row in the call to 1107 MatCreateSeqSBAIJ(). 1108 1109 MUST be called before any calls to MatSetValues() 1110 1111 .seealso: MatCreateSeqSBAIJ 1112 @*/ 1113 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1114 { 1115 int ierr,(*f)(Mat,int *); 1116 1117 PetscFunctionBegin; 1118 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1119 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1120 if (f) { 1121 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1122 } else { 1123 SETERRQ(1,"Wrong type of matrix to set column indices"); 1124 } 1125 PetscFunctionReturn(0); 1126 } 1127 1128 /* -------------------------------------------------------------------*/ 1129 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1130 MatGetRow_SeqSBAIJ, 1131 MatRestoreRow_SeqSBAIJ, 1132 MatMult_SeqSBAIJ_N, 1133 MatMultAdd_SeqSBAIJ_N, 1134 MatMultTranspose_SeqSBAIJ, 1135 MatMultTransposeAdd_SeqSBAIJ, 1136 MatSolve_SeqSBAIJ_N, 1137 0, 1138 0, 1139 0, 1140 0, 1141 MatCholeskyFactor_SeqSBAIJ, 1142 0, 1143 MatTranspose_SeqSBAIJ, 1144 MatGetInfo_SeqSBAIJ, 1145 MatEqual_SeqSBAIJ, 1146 MatGetDiagonal_SeqSBAIJ, 1147 MatDiagonalScale_SeqSBAIJ, 1148 MatNorm_SeqSBAIJ, 1149 0, 1150 MatAssemblyEnd_SeqSBAIJ, 1151 0, 1152 MatSetOption_SeqSBAIJ, 1153 MatZeroEntries_SeqSBAIJ, 1154 MatZeroRows_SeqSBAIJ, 1155 0, 1156 0, 1157 MatCholeskyFactorSymbolic_SeqSBAIJ, 1158 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1159 0, 1160 0, 1161 MatGetOwnershipRange_SeqSBAIJ, 1162 0, 1163 MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ, 1164 0, 1165 0, 1166 MatDuplicate_SeqSBAIJ, 1167 0, 1168 0, 1169 0, 1170 0, 1171 0, 1172 MatGetSubMatrices_SeqSBAIJ, 1173 MatIncreaseOverlap_SeqSBAIJ, 1174 MatGetValues_SeqSBAIJ, 1175 0, 1176 MatPrintHelp_SeqSBAIJ, 1177 MatScale_SeqSBAIJ, 1178 0, 1179 0, 1180 0, 1181 MatGetBlockSize_SeqSBAIJ, 1182 MatGetRowIJ_SeqSBAIJ, 1183 MatRestoreRowIJ_SeqSBAIJ, 1184 0, 1185 0, 1186 0, 1187 0, 1188 0, 1189 0, 1190 MatSetValuesBlocked_SeqSBAIJ, 1191 MatGetSubMatrix_SeqSBAIJ, 1192 0, 1193 0, 1194 MatGetMaps_Petsc, 1195 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 0, 1201 MatGetRowMax_SeqSBAIJ}; 1202 1203 EXTERN_C_BEGIN 1204 #undef __FUNC__ 1205 #define __FUNC__ "MatStoreValues_SeqSBAIJ" 1206 int MatStoreValues_SeqSBAIJ(Mat mat) 1207 { 1208 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1209 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 if (aij->nonew != 1) { 1214 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1215 } 1216 1217 /* allocate space for values if not already there */ 1218 if (!aij->saved_values) { 1219 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1220 } 1221 1222 /* copy values over */ 1223 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 EXTERN_C_END 1227 1228 EXTERN_C_BEGIN 1229 #undef __FUNC__ 1230 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ" 1231 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1232 { 1233 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1234 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1235 1236 PetscFunctionBegin; 1237 if (aij->nonew != 1) { 1238 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1239 } 1240 if (!aij->saved_values) { 1241 SETERRQ(1,"Must call MatStoreValues(A);first"); 1242 } 1243 1244 /* copy values over */ 1245 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 EXTERN_C_END 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ "MatCreate_SeqSBAIJ" 1252 int MatCreate_SeqSBAIJ(Mat B) 1253 { 1254 Mat_SeqSBAIJ *b; 1255 int i,ierr,size; 1256 PetscTruth flg; 1257 int s_nz; 1258 1259 PetscFunctionBegin; 1260 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1261 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1262 1263 B->data = (void*)(b = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(b); 1264 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1265 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1266 B->ops->destroy = MatDestroy_SeqSBAIJ; 1267 B->ops->view = MatView_SeqSBAIJ; 1268 B->factor = 0; 1269 B->lupivotthreshold = 1.0; 1270 B->mapping = 0; 1271 b->row = 0; 1272 b->icol = 0; 1273 b->reallocs = 0; 1274 b->saved_values = 0; 1275 1276 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1277 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1278 1279 b->sorted = PETSC_FALSE; 1280 b->roworiented = PETSC_TRUE; 1281 b->nonew = 0; 1282 b->diag = 0; 1283 b->solve_work = 0; 1284 b->mult_work = 0; 1285 b->spptr = 0; 1286 b->keepzeroedrows = PETSC_FALSE; 1287 1288 b->inew = 0; 1289 b->jnew = 0; 1290 b->anew = 0; 1291 b->a2anew = 0; 1292 b->permute = PETSC_FALSE; 1293 1294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1295 "MatStoreValues_SeqSBAIJ", 1296 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1298 "MatRetrieveValues_SeqSBAIJ", 1299 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1301 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1302 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNC__ 1307 #define __FUNC__ "MatSeqSBAIJSetNonzeroStructure" 1308 /*@C 1309 MatSeqSBAIJSetNonzeroStructure - Creates a sparse symmetric matrix in block AIJ (block 1310 compressed row) format. For good matrix assembly performance the 1311 user should preallocate the matrix storage by setting the parameter nz 1312 (or the array nnz). By setting these parameters accurately, performance 1313 during matrix assembly can be increased by more than a factor of 50. 1314 1315 Collective on Mat 1316 1317 Input Parameters: 1318 + A - the symmetric matrix 1319 . bs - size of block 1320 . nz - number of block nonzeros per block row (same for all rows) 1321 - nnz - array containing the number of block nonzeros in the various block rows 1322 (possibly different for each block row) or PETSC_NULL 1323 1324 Options Database Keys: 1325 . -mat_no_unroll - uses code that does not unroll the loops in the 1326 block calculations (much slower) 1327 . -mat_block_size - size of the blocks to use 1328 1329 Level: intermediate 1330 1331 Notes: 1332 The block AIJ format is compatible with standard Fortran 77 1333 storage. That is, the stored row and column indices can begin at 1334 either one (as in Fortran) or zero. See the users' manual for details. 1335 1336 Specify the preallocated storage with either nz or nnz (not both). 1337 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1338 allocation. For additional details, see the users manual chapter on 1339 matrices. 1340 1341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1342 @*/ 1343 int MatSeqSBAIJSetNonzeroStructure(Mat B,int bs,int nz,int *nnz) 1344 { 1345 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1346 int i,len,ierr,mbs,bs2,size; 1347 PetscTruth flg; 1348 int s_nz; 1349 1350 PetscFunctionBegin; 1351 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1352 mbs = B->m/bs; 1353 bs2 = bs*bs; 1354 1355 if (mbs*bs != B->m) { 1356 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1357 } 1358 1359 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz); 1360 if (nnz) { 1361 for (i=0; i<mbs; i++) { 1362 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1363 } 1364 } 1365 1366 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1367 if (!flg) { 1368 switch (bs) { 1369 case 1: 1370 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1371 B->ops->solve = MatSolve_SeqSBAIJ_1; 1372 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1373 B->ops->mult = MatMult_SeqSBAIJ_1; 1374 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1375 break; 1376 case 2: 1377 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1378 B->ops->solve = MatSolve_SeqSBAIJ_2; 1379 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1380 B->ops->mult = MatMult_SeqSBAIJ_2; 1381 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1382 break; 1383 case 3: 1384 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1385 B->ops->solve = MatSolve_SeqSBAIJ_3; 1386 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1387 B->ops->mult = MatMult_SeqSBAIJ_3; 1388 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1389 break; 1390 case 4: 1391 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1392 B->ops->solve = MatSolve_SeqSBAIJ_4; 1393 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1394 B->ops->mult = MatMult_SeqSBAIJ_4; 1395 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1396 break; 1397 case 5: 1398 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1399 B->ops->solve = MatSolve_SeqSBAIJ_5; 1400 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1401 B->ops->mult = MatMult_SeqSBAIJ_5; 1402 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1403 break; 1404 case 6: 1405 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1406 B->ops->solve = MatSolve_SeqSBAIJ_6; 1407 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1408 B->ops->mult = MatMult_SeqSBAIJ_6; 1409 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1410 break; 1411 case 7: 1412 B->ops->mult = MatMult_SeqSBAIJ_7; 1413 B->ops->solve = MatSolve_SeqSBAIJ_7; 1414 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1415 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1416 break; 1417 } 1418 } 1419 1420 b->mbs = mbs; 1421 b->nbs = mbs; 1422 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1423 if (!nnz) { 1424 if (nz == PETSC_DEFAULT) nz = 5; 1425 else if (nz <= 0) nz = 1; 1426 for (i=0; i<mbs; i++) { 1427 b->imax[i] = nz; 1428 } 1429 nz = nz*mbs; /* total nz */ 1430 } else { 1431 nz = 0; 1432 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1433 } 1434 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1435 s_nz = nz; 1436 1437 /* allocate the matrix space */ 1438 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1439 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1440 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1441 b->j = (int*)(b->a + s_nz*bs2); 1442 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1443 b->i = b->j + s_nz; 1444 b->singlemalloc = PETSC_TRUE; 1445 1446 /* pointer to beginning of each row */ 1447 b->i[0] = 0; 1448 for (i=1; i<mbs+1; i++) { 1449 b->i[i] = b->i[i-1] + b->imax[i-1]; 1450 } 1451 1452 /* b->ilen will count nonzeros in each block row so far. */ 1453 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1454 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1455 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1456 1457 b->bs = bs; 1458 b->bs2 = bs2; 1459 b->s_nz = 0; 1460 b->s_maxnz = s_nz*bs2; 1461 1462 b->inew = 0; 1463 b->jnew = 0; 1464 b->anew = 0; 1465 b->a2anew = 0; 1466 b->permute = PETSC_FALSE; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatCreateSeqSBAIJ" 1473 /*@C 1474 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1475 compressed row) format. For good matrix assembly performance the 1476 user should preallocate the matrix storage by setting the parameter nz 1477 (or the array nnz). By setting these parameters accurately, performance 1478 during matrix assembly can be increased by more than a factor of 50. 1479 1480 Collective on MPI_Comm 1481 1482 Input Parameters: 1483 + comm - MPI communicator, set to PETSC_COMM_SELF 1484 . bs - size of block 1485 . m - number of rows, or number of columns 1486 . nz - number of block nonzeros per block row (same for all rows) 1487 - nnz - array containing the number of block nonzeros in the various block rows 1488 (possibly different for each block row) or PETSC_NULL 1489 1490 Output Parameter: 1491 . A - the symmetric matrix 1492 1493 Options Database Keys: 1494 . -mat_no_unroll - uses code that does not unroll the loops in the 1495 block calculations (much slower) 1496 . -mat_block_size - size of the blocks to use 1497 1498 Level: intermediate 1499 1500 Notes: 1501 The block AIJ format is compatible with standard Fortran 77 1502 storage. That is, the stored row and column indices can begin at 1503 either one (as in Fortran) or zero. See the users' manual for details. 1504 1505 Specify the preallocated storage with either nz or nnz (not both). 1506 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1507 allocation. For additional details, see the users manual chapter on 1508 matrices. 1509 1510 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1511 @*/ 1512 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1513 { 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1518 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1519 ierr = MatSeqSBAIJSetNonzeroStructure(*A,bs,nz,nnz);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNC__ 1524 #define __FUNC__ "MatDuplicate_SeqSBAIJ" 1525 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1526 { 1527 Mat C; 1528 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1529 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1530 1531 PetscFunctionBegin; 1532 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1533 1534 *B = 0; 1535 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&C);CHKERRQ(ierr); 1536 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1537 C->data = (void*)(c = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(c); 1538 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1539 C->ops->destroy = MatDestroy_SeqSBAIJ; 1540 C->ops->view = MatView_SeqSBAIJ; 1541 C->factor = A->factor; 1542 c->row = 0; 1543 c->icol = 0; 1544 c->saved_values = 0; 1545 c->keepzeroedrows = a->keepzeroedrows; 1546 C->assembled = PETSC_TRUE; 1547 1548 c->bs = a->bs; 1549 c->bs2 = a->bs2; 1550 c->mbs = a->mbs; 1551 c->nbs = a->nbs; 1552 1553 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1554 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1555 for (i=0; i<mbs; i++) { 1556 c->imax[i] = a->imax[i]; 1557 c->ilen[i] = a->ilen[i]; 1558 } 1559 1560 /* allocate the matrix space */ 1561 c->singlemalloc = PETSC_TRUE; 1562 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1563 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1564 c->j = (int*)(c->a + nz*bs2); 1565 c->i = c->j + nz; 1566 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1567 if (mbs > 0) { 1568 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1569 if (cpvalues == MAT_COPY_VALUES) { 1570 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1571 } else { 1572 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1573 } 1574 } 1575 1576 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1577 c->sorted = a->sorted; 1578 c->roworiented = a->roworiented; 1579 c->nonew = a->nonew; 1580 1581 if (a->diag) { 1582 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1583 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1584 for (i=0; i<mbs; i++) { 1585 c->diag[i] = a->diag[i]; 1586 } 1587 } else c->diag = 0; 1588 c->s_nz = a->s_nz; 1589 c->s_maxnz = a->s_maxnz; 1590 c->solve_work = 0; 1591 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1592 c->mult_work = 0; 1593 *B = C; 1594 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNC__ 1599 #define __FUNC__ "MatLoad_SeqSBAIJ" 1600 int MatLoad_SeqSBAIJ(Viewer viewer,MatType type,Mat *A) 1601 { 1602 Mat_SeqSBAIJ *a; 1603 Mat B; 1604 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1605 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount; 1606 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1607 int *masked,nmask,tmp,bs2,ishift; 1608 Scalar *aa; 1609 MPI_Comm comm = ((PetscObject)viewer)->comm; 1610 1611 PetscFunctionBegin; 1612 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1613 bs2 = bs*bs; 1614 1615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1616 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1617 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1618 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1619 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1620 M = header[1]; N = header[2]; nz = header[3]; 1621 1622 if (header[3] < 0) { 1623 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1624 } 1625 1626 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1627 1628 /* 1629 This code adds extra rows to make sure the number of rows is 1630 divisible by the blocksize 1631 */ 1632 mbs = M/bs; 1633 extra_rows = bs - M + bs*(mbs); 1634 if (extra_rows == bs) extra_rows = 0; 1635 else mbs++; 1636 if (extra_rows) { 1637 PLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1638 } 1639 1640 /* read in row lengths */ 1641 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1642 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1643 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1644 1645 /* read in column indices */ 1646 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1647 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1648 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1649 1650 /* loop over row lengths determining block row lengths */ 1651 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1652 s_browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(s_browlengths); 1653 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1654 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1655 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1656 masked = mask + mbs; 1657 rowcount = 0; nzcount = 0; 1658 for (i=0; i<mbs; i++) { 1659 nmask = 0; 1660 for (j=0; j<bs; j++) { 1661 kmax = rowlengths[rowcount]; 1662 for (k=0; k<kmax; k++) { 1663 tmp = jj[nzcount++]/bs; /* block col. index */ 1664 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1665 } 1666 rowcount++; 1667 } 1668 s_browlengths[i] += nmask; 1669 browlengths[i] = 2*s_browlengths[i]; 1670 1671 /* zero out the mask elements we set */ 1672 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1673 } 1674 1675 /* create our matrix */ 1676 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1677 B = *A; 1678 a = (Mat_SeqSBAIJ*)B->data; 1679 1680 /* set matrix "i" values */ 1681 a->i[0] = 0; 1682 for (i=1; i<= mbs; i++) { 1683 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1684 a->ilen[i-1] = s_browlengths[i-1]; 1685 } 1686 a->s_nz = 0; 1687 for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i]; 1688 1689 /* read in nonzero values */ 1690 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1691 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1692 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1693 1694 /* set "a" and "j" values into matrix */ 1695 nzcount = 0; jcount = 0; 1696 for (i=0; i<mbs; i++) { 1697 nzcountb = nzcount; 1698 nmask = 0; 1699 for (j=0; j<bs; j++) { 1700 kmax = rowlengths[i*bs+j]; 1701 for (k=0; k<kmax; k++) { 1702 tmp = jj[nzcount++]/bs; /* block col. index */ 1703 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1704 } 1705 rowcount++; 1706 } 1707 /* sort the masked values */ 1708 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1709 1710 /* set "j" values into matrix */ 1711 maskcount = 1; 1712 for (j=0; j<nmask; j++) { 1713 a->j[jcount++] = masked[j]; 1714 mask[masked[j]] = maskcount++; 1715 } 1716 1717 /* set "a" values into matrix */ 1718 ishift = bs2*a->i[i]; 1719 for (j=0; j<bs; j++) { 1720 kmax = rowlengths[i*bs+j]; 1721 for (k=0; k<kmax; k++) { 1722 tmp = jj[nzcountb]/bs ; /* block col. index */ 1723 if (tmp >= i){ 1724 block = mask[tmp] - 1; 1725 point = jj[nzcountb] - bs*tmp; 1726 idx = ishift + bs2*block + j + bs*point; 1727 a->a[idx] = aa[nzcountb]; 1728 } 1729 nzcountb++; 1730 } 1731 } 1732 /* zero out the mask elements we set */ 1733 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1734 } 1735 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1736 1737 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1738 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1739 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1740 ierr = PetscFree(aa);CHKERRQ(ierr); 1741 ierr = PetscFree(jj);CHKERRQ(ierr); 1742 ierr = PetscFree(mask);CHKERRQ(ierr); 1743 1744 B->assembled = PETSC_TRUE; 1745 ierr = MatView_Private(B);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 1750 1751 1752