1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 560 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 565 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 const MatScalar *value = v; 602 MatScalar *ap,*aa = a->a,*bap; 603 604 PetscFunctionBegin; 605 if (roworiented) { 606 stepval = (n-1)*bs; 607 } else { 608 stepval = (m-1)*bs; 609 } 610 for (k=0; k<m; k++) { /* loop over added rows */ 611 row = im[k]; 612 if (row < 0) continue; 613 #if defined(PETSC_USE_BOPT_g) 614 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 615 #endif 616 rp = aj + ai[row]; 617 ap = aa + bs2*ai[row]; 618 rmax = imax[row]; 619 nrow = ailen[row]; 620 low = 0; 621 for (l=0; l<n; l++) { /* loop over added columns */ 622 if (in[l] < 0) continue; 623 #if defined(PETSC_USE_BOPT_g) 624 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 625 #endif 626 col = in[l]; 627 if (col < row) continue; /* ignore lower triangular block */ 628 if (roworiented) { 629 value = v + k*(stepval+bs)*bs + l*bs; 630 } else { 631 value = v + l*(stepval+bs)*bs + k*bs; 632 } 633 if (!sorted) low = 0; high = nrow; 634 while (high-low > 7) { 635 t = (low+high)/2; 636 if (rp[t] > col) high = t; 637 else low = t; 638 } 639 for (i=low; i<high; i++) { 640 if (rp[i] > col) break; 641 if (rp[i] == col) { 642 bap = ap + bs2*i; 643 if (roworiented) { 644 if (is == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 bap[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 bap[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 if (is == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval) { 660 for (jj=0; jj<bs; jj++) { 661 *bap++ += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval) { 666 for (jj=0; jj<bs; jj++) { 667 *bap++ = *value++; 668 } 669 } 670 } 671 } 672 goto noinsert2; 673 } 674 } 675 if (nonew == 1) goto noinsert2; 676 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 677 if (nrow >= rmax) { 678 /* there is no extra room in row, therefore enlarge */ 679 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 680 MatScalar *new_a; 681 682 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 683 684 /* malloc new storage space */ 685 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 686 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 687 new_j = (int*)(new_a + bs2*new_nz); 688 new_i = new_j + new_nz; 689 690 /* copy over old data into new slots */ 691 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 692 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 693 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 694 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 695 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 698 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 699 /* free up old matrix storage */ 700 ierr = PetscFree(a->a);CHKERRQ(ierr); 701 if (!a->singlemalloc) { 702 ierr = PetscFree(a->i);CHKERRQ(ierr); 703 ierr = PetscFree(a->j);CHKERRQ(ierr); 704 } 705 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 706 a->singlemalloc = PETSC_TRUE; 707 708 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 709 rmax = imax[row] = imax[row] + CHUNKSIZE; 710 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 711 a->s_maxnz += bs2*CHUNKSIZE; 712 a->reallocs++; 713 a->s_nz++; 714 } 715 N = nrow++ - 1; 716 /* shift up all the later entries in this row */ 717 for (ii=N; ii>=i; ii--) { 718 rp[ii+1] = rp[ii]; 719 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 if (N >= i) { 722 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 rp[i] = col; 725 bap = ap + bs2*i; 726 if (roworiented) { 727 for (ii=0; ii<bs; ii++,value+=stepval) { 728 for (jj=ii; jj<bs2; jj+=bs) { 729 bap[jj] = *value++; 730 } 731 } 732 } else { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=0; jj<bs; jj++) { 735 *bap++ = *value++; 736 } 737 } 738 } 739 noinsert2:; 740 low = i; 741 } 742 ailen[row] = nrow; 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 750 { 751 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 752 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 753 int m = A->m,*ip,N,*ailen = a->ilen; 754 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 755 MatScalar *aa = a->a,*ap; 756 #if defined(PETSC_HAVE_MUMPS) 757 PetscTruth flag; 758 #endif 759 760 PetscFunctionBegin; 761 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 762 763 if (m) rmax = ailen[0]; 764 for (i=1; i<mbs; i++) { 765 /* move each row back by the amount of empty slots (fshift) before it*/ 766 fshift += imax[i-1] - ailen[i-1]; 767 rmax = PetscMax(rmax,ailen[i]); 768 if (fshift) { 769 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 770 N = ailen[i]; 771 for (j=0; j<N; j++) { 772 ip[j-fshift] = ip[j]; 773 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 774 } 775 } 776 ai[i] = ai[i-1] + ailen[i-1]; 777 } 778 if (mbs) { 779 fshift += imax[mbs-1] - ailen[mbs-1]; 780 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 781 } 782 /* reset ilen and imax for each row */ 783 for (i=0; i<mbs; i++) { 784 ailen[i] = imax[i] = ai[i+1] - ai[i]; 785 } 786 a->s_nz = ai[mbs]; 787 788 /* diagonals may have moved, reset it */ 789 if (a->diag) { 790 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 791 } 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 793 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 795 a->reallocs); 796 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 797 a->reallocs = 0; 798 A->info.nz_unneeded = (PetscReal)fshift*bs2; 799 800 #if defined(PETSC_HAVE_MUMPS) 801 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 802 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 803 #endif 804 805 PetscFunctionReturn(0); 806 } 807 808 /* 809 This function returns an array of flags which indicate the locations of contiguous 810 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 811 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 812 Assume: sizes should be long enough to hold all the values. 813 */ 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 816 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 817 { 818 int i,j,k,row; 819 PetscTruth flg; 820 821 PetscFunctionBegin; 822 for (i=0,j=0; i<n; j++) { 823 row = idx[i]; 824 if (row%bs!=0) { /* Not the begining of a block */ 825 sizes[j] = 1; 826 i++; 827 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 828 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 829 i++; 830 } else { /* Begining of the block, so check if the complete block exists */ 831 flg = PETSC_TRUE; 832 for (k=1; k<bs; k++) { 833 if (row+k != idx[i+k]) { /* break in the block */ 834 flg = PETSC_FALSE; 835 break; 836 } 837 } 838 if (flg == PETSC_TRUE) { /* No break in the bs */ 839 sizes[j] = bs; 840 i+= bs; 841 } else { 842 sizes[j] = 1; 843 i++; 844 } 845 } 846 } 847 *bs_max = j; 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 853 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 854 { 855 PetscFunctionBegin; 856 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 857 } 858 859 /* Only add/insert a(i,j) with i<=j (blocks). 860 Any a(i,j) with i>j input by user is ingored. 861 */ 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 865 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 866 { 867 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 868 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 869 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 870 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 871 int ridx,cidx,bs2=a->bs2,ierr; 872 MatScalar *ap,value,*aa=a->a,*bap; 873 874 PetscFunctionBegin; 875 876 for (k=0; k<m; k++) { /* loop over added rows */ 877 row = im[k]; /* row number */ 878 brow = row/bs; /* block row number */ 879 if (row < 0) continue; 880 #if defined(PETSC_USE_BOPT_g) 881 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 882 #endif 883 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 884 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 885 rmax = imax[brow]; /* maximum space allocated for this row */ 886 nrow = ailen[brow]; /* actual length of this row */ 887 low = 0; 888 889 for (l=0; l<n; l++) { /* loop over added columns */ 890 if (in[l] < 0) continue; 891 #if defined(PETSC_USE_BOPT_g) 892 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 893 #endif 894 col = in[l]; 895 bcol = col/bs; /* block col number */ 896 897 if (brow <= bcol){ 898 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 899 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 900 /* element value a(k,l) */ 901 if (roworiented) { 902 value = v[l + k*n]; 903 } else { 904 value = v[k + l*m]; 905 } 906 907 /* move pointer bap to a(k,l) quickly and add/insert value */ 908 if (!sorted) low = 0; high = nrow; 909 while (high-low > 7) { 910 t = (low+high)/2; 911 if (rp[t] > bcol) high = t; 912 else low = t; 913 } 914 for (i=low; i<high; i++) { 915 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 916 if (rp[i] > bcol) break; 917 if (rp[i] == bcol) { 918 bap = ap + bs2*i + bs*cidx + ridx; 919 if (is == ADD_VALUES) *bap += value; 920 else *bap = value; 921 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 922 if (brow == bcol && ridx < cidx){ 923 bap = ap + bs2*i + bs*ridx + cidx; 924 if (is == ADD_VALUES) *bap += value; 925 else *bap = value; 926 } 927 goto noinsert1; 928 } 929 } 930 931 if (nonew == 1) goto noinsert1; 932 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 933 if (nrow >= rmax) { 934 /* there is no extra room in row, therefore enlarge */ 935 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 936 MatScalar *new_a; 937 938 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 939 940 /* Malloc new storage space */ 941 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 942 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 943 new_j = (int*)(new_a + bs2*new_nz); 944 new_i = new_j + new_nz; 945 946 /* copy over old data into new slots */ 947 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 948 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 949 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 950 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 951 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 952 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 953 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 954 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 955 /* free up old matrix storage */ 956 ierr = PetscFree(a->a);CHKERRQ(ierr); 957 if (!a->singlemalloc) { 958 ierr = PetscFree(a->i);CHKERRQ(ierr); 959 ierr = PetscFree(a->j);CHKERRQ(ierr); 960 } 961 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 962 a->singlemalloc = PETSC_TRUE; 963 964 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 965 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 966 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 967 a->s_maxnz += bs2*CHUNKSIZE; 968 a->reallocs++; 969 a->s_nz++; 970 } 971 972 N = nrow++ - 1; 973 /* shift up all the later entries in this row */ 974 for (ii=N; ii>=i; ii--) { 975 rp[ii+1] = rp[ii]; 976 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 977 } 978 if (N>=i) { 979 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 rp[i] = bcol; 982 ap[bs2*i + bs*cidx + ridx] = value; 983 noinsert1:; 984 low = i; 985 /* } */ 986 } 987 } /* end of if .. if.. */ 988 } /* end of loop over added columns */ 989 ailen[brow] = nrow; 990 } /* end of loop over added rows */ 991 992 PetscFunctionReturn(0); 993 } 994 995 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 996 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 997 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 998 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 999 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 1000 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1001 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1002 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 1003 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1004 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1005 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1006 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1007 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1008 extern int MatZeroEntries_SeqSBAIJ(Mat); 1009 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1010 1011 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1026 1027 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1028 1029 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1037 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1046 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1047 1048 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1056 1057 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1065 1066 #ifdef HAVE_MatICCFactor 1067 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1070 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1071 { 1072 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1073 Mat outA; 1074 int ierr; 1075 PetscTruth row_identity,col_identity; 1076 1077 PetscFunctionBegin; 1078 /* 1079 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1080 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1081 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1082 if (!row_identity || !col_identity) { 1083 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1084 } 1085 */ 1086 1087 outA = inA; 1088 inA->factor = FACTOR_CHOLESKY; 1089 1090 if (!a->diag) { 1091 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1092 } 1093 /* 1094 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1095 for ILU(0) factorization with natural ordering 1096 */ 1097 switch (a->bs) { 1098 case 1: 1099 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1100 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1101 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1102 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1103 case 2: 1104 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1105 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1106 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1107 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1108 break; 1109 case 3: 1110 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1111 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1112 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1113 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1114 break; 1115 case 4: 1116 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1117 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1118 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1119 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1120 break; 1121 case 5: 1122 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1123 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1124 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1125 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1126 break; 1127 case 6: 1128 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1129 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1130 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1131 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1132 break; 1133 case 7: 1134 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1135 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1136 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1137 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1138 break; 1139 default: 1140 a->row = row; 1141 a->icol = col; 1142 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1143 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1144 1145 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1146 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1147 PetscLogObjectParent(inA,a->icol); 1148 1149 if (!a->solve_work) { 1150 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1151 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1152 } 1153 } 1154 1155 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1156 1157 PetscFunctionReturn(0); 1158 } 1159 #endif 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1163 int MatPrintHelp_SeqSBAIJ(Mat A) 1164 { 1165 static PetscTruth called = PETSC_FALSE; 1166 MPI_Comm comm = A->comm; 1167 int ierr; 1168 1169 PetscFunctionBegin; 1170 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1171 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1172 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1179 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1180 { 1181 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1182 int i,nz,n; 1183 1184 PetscFunctionBegin; 1185 nz = baij->s_maxnz; 1186 n = mat->n; 1187 for (i=0; i<nz; i++) { 1188 baij->j[i] = indices[i]; 1189 } 1190 baij->s_nz = nz; 1191 for (i=0; i<n; i++) { 1192 baij->ilen[i] = baij->imax[i]; 1193 } 1194 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1201 /*@ 1202 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1203 in the matrix. 1204 1205 Input Parameters: 1206 + mat - the SeqSBAIJ matrix 1207 - indices - the column indices 1208 1209 Level: advanced 1210 1211 Notes: 1212 This can be called if you have precomputed the nonzero structure of the 1213 matrix and want to provide it to the matrix object to improve the performance 1214 of the MatSetValues() operation. 1215 1216 You MUST have set the correct numbers of nonzeros per row in the call to 1217 MatCreateSeqSBAIJ(). 1218 1219 MUST be called before any calls to MatSetValues() 1220 1221 .seealso: MatCreateSeqSBAIJ 1222 @*/ 1223 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1224 { 1225 int ierr,(*f)(Mat,int *); 1226 1227 PetscFunctionBegin; 1228 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1229 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1230 if (f) { 1231 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1232 } else { 1233 SETERRQ(1,"Wrong type of matrix to set column indices"); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1240 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1241 { 1242 int ierr; 1243 1244 PetscFunctionBegin; 1245 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1251 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1252 { 1253 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1254 PetscFunctionBegin; 1255 *array = a->a; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1261 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1262 { 1263 PetscFunctionBegin; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #include "petscblaslapack.h" 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1270 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1271 { 1272 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1273 int ierr,one=1,i,bs=y->bs,bs2,j; 1274 1275 PetscFunctionBegin; 1276 if (str == SAME_NONZERO_PATTERN) { 1277 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1278 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1279 if (y->xtoy && y->XtoY != X) { 1280 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1281 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1282 } 1283 if (!y->xtoy) { /* get xtoy */ 1284 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1285 y->XtoY = X; 1286 } 1287 bs2 = bs*bs; 1288 for (i=0; i<x->s_nz; i++) { 1289 j = 0; 1290 while (j < bs2){ 1291 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1292 j++; 1293 } 1294 } 1295 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1296 } else { 1297 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 /* -------------------------------------------------------------------*/ 1303 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1304 MatGetRow_SeqSBAIJ, 1305 MatRestoreRow_SeqSBAIJ, 1306 MatMult_SeqSBAIJ_N, 1307 MatMultAdd_SeqSBAIJ_N, 1308 MatMultTranspose_SeqSBAIJ, 1309 MatMultTransposeAdd_SeqSBAIJ, 1310 MatSolve_SeqSBAIJ_N, 1311 0, 1312 0, 1313 0, 1314 0, 1315 MatCholeskyFactor_SeqSBAIJ, 1316 MatRelax_SeqSBAIJ, 1317 MatTranspose_SeqSBAIJ, 1318 MatGetInfo_SeqSBAIJ, 1319 MatEqual_SeqSBAIJ, 1320 MatGetDiagonal_SeqSBAIJ, 1321 MatDiagonalScale_SeqSBAIJ, 1322 MatNorm_SeqSBAIJ, 1323 0, 1324 MatAssemblyEnd_SeqSBAIJ, 1325 0, 1326 MatSetOption_SeqSBAIJ, 1327 MatZeroEntries_SeqSBAIJ, 1328 MatZeroRows_SeqSBAIJ, 1329 0, 1330 0, 1331 MatCholeskyFactorSymbolic_SeqSBAIJ, 1332 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1333 MatSetUpPreallocation_SeqSBAIJ, 1334 0, 1335 MatICCFactorSymbolic_SeqSBAIJ, 1336 MatGetArray_SeqSBAIJ, 1337 MatRestoreArray_SeqSBAIJ, 1338 MatDuplicate_SeqSBAIJ, 1339 0, 1340 0, 1341 0, 1342 0, 1343 MatAXPY_SeqSBAIJ, 1344 MatGetSubMatrices_SeqSBAIJ, 1345 MatIncreaseOverlap_SeqSBAIJ, 1346 MatGetValues_SeqSBAIJ, 1347 0, 1348 MatPrintHelp_SeqSBAIJ, 1349 MatScale_SeqSBAIJ, 1350 0, 1351 0, 1352 0, 1353 MatGetBlockSize_SeqSBAIJ, 1354 MatGetRowIJ_SeqSBAIJ, 1355 MatRestoreRowIJ_SeqSBAIJ, 1356 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 MatSetValuesBlocked_SeqSBAIJ, 1363 MatGetSubMatrix_SeqSBAIJ, 1364 0, 1365 0, 1366 MatGetPetscMaps_Petsc, 1367 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 0, 1373 MatGetRowMax_SeqSBAIJ, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 #if !defined(PETSC_USE_COMPLEX) 1388 MatGetInertia_SeqSBAIJ 1389 #else 1390 0 1391 #endif 1392 }; 1393 1394 EXTERN_C_BEGIN 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1397 int MatStoreValues_SeqSBAIJ(Mat mat) 1398 { 1399 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1400 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1401 int ierr; 1402 1403 PetscFunctionBegin; 1404 if (aij->nonew != 1) { 1405 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1406 } 1407 1408 /* allocate space for values if not already there */ 1409 if (!aij->saved_values) { 1410 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1411 } 1412 1413 /* copy values over */ 1414 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 EXTERN_C_END 1418 1419 EXTERN_C_BEGIN 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1422 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1423 { 1424 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1425 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1426 1427 PetscFunctionBegin; 1428 if (aij->nonew != 1) { 1429 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1430 } 1431 if (!aij->saved_values) { 1432 SETERRQ(1,"Must call MatStoreValues(A);first"); 1433 } 1434 1435 /* copy values over */ 1436 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 EXTERN_C_END 1440 1441 EXTERN_C_BEGIN 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1444 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1445 { 1446 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1447 int i,len,ierr,mbs,bs2; 1448 PetscTruth flg; 1449 int s_nz; 1450 1451 PetscFunctionBegin; 1452 B->preallocated = PETSC_TRUE; 1453 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1454 mbs = B->m/bs; 1455 bs2 = bs*bs; 1456 1457 if (mbs*bs != B->m) { 1458 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1459 } 1460 1461 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1462 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1463 if (nnz) { 1464 for (i=0; i<mbs; i++) { 1465 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1466 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1467 } 1468 } 1469 1470 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1471 if (!flg) { 1472 switch (bs) { 1473 case 1: 1474 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1475 B->ops->solve = MatSolve_SeqSBAIJ_1; 1476 B->ops->solves = MatSolves_SeqSBAIJ_1; 1477 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1478 B->ops->mult = MatMult_SeqSBAIJ_1; 1479 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1480 break; 1481 case 2: 1482 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1483 B->ops->solve = MatSolve_SeqSBAIJ_2; 1484 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1485 B->ops->mult = MatMult_SeqSBAIJ_2; 1486 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1487 break; 1488 case 3: 1489 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1490 B->ops->solve = MatSolve_SeqSBAIJ_3; 1491 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1492 B->ops->mult = MatMult_SeqSBAIJ_3; 1493 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1494 break; 1495 case 4: 1496 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1497 B->ops->solve = MatSolve_SeqSBAIJ_4; 1498 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1499 B->ops->mult = MatMult_SeqSBAIJ_4; 1500 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1501 break; 1502 case 5: 1503 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1504 B->ops->solve = MatSolve_SeqSBAIJ_5; 1505 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1506 B->ops->mult = MatMult_SeqSBAIJ_5; 1507 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1508 break; 1509 case 6: 1510 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1511 B->ops->solve = MatSolve_SeqSBAIJ_6; 1512 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1513 B->ops->mult = MatMult_SeqSBAIJ_6; 1514 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1515 break; 1516 case 7: 1517 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1518 B->ops->solve = MatSolve_SeqSBAIJ_7; 1519 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1520 B->ops->mult = MatMult_SeqSBAIJ_7; 1521 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1522 break; 1523 } 1524 } 1525 1526 b->mbs = mbs; 1527 b->nbs = mbs; 1528 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1529 if (!nnz) { 1530 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1531 else if (nz <= 0) nz = 1; 1532 for (i=0; i<mbs; i++) { 1533 b->imax[i] = nz; 1534 } 1535 nz = nz*mbs; /* total nz */ 1536 } else { 1537 nz = 0; 1538 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1539 } 1540 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1541 s_nz = nz; 1542 1543 /* allocate the matrix space */ 1544 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1545 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1546 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1547 b->j = (int*)(b->a + s_nz*bs2); 1548 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1549 b->i = b->j + s_nz; 1550 b->singlemalloc = PETSC_TRUE; 1551 1552 /* pointer to beginning of each row */ 1553 b->i[0] = 0; 1554 for (i=1; i<mbs+1; i++) { 1555 b->i[i] = b->i[i-1] + b->imax[i-1]; 1556 } 1557 1558 /* b->ilen will count nonzeros in each block row so far. */ 1559 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1560 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1561 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1562 1563 b->bs = bs; 1564 b->bs2 = bs2; 1565 b->s_nz = 0; 1566 b->s_maxnz = s_nz*bs2; 1567 1568 b->inew = 0; 1569 b->jnew = 0; 1570 b->anew = 0; 1571 b->a2anew = 0; 1572 b->permute = PETSC_FALSE; 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 EXTERN_C_BEGIN 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1580 int MatCreate_SeqSBAIJ(Mat B) 1581 { 1582 Mat_SeqSBAIJ *b; 1583 int ierr,size; 1584 1585 PetscFunctionBegin; 1586 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1587 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1588 B->m = B->M = PetscMax(B->m,B->M); 1589 B->n = B->N = PetscMax(B->n,B->N); 1590 1591 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1592 B->data = (void*)b; 1593 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1594 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1595 B->ops->destroy = MatDestroy_SeqSBAIJ; 1596 B->ops->view = MatView_SeqSBAIJ; 1597 B->factor = 0; 1598 B->lupivotthreshold = 1.0; 1599 B->mapping = 0; 1600 b->row = 0; 1601 b->icol = 0; 1602 b->reallocs = 0; 1603 b->saved_values = 0; 1604 1605 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1606 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1607 1608 b->sorted = PETSC_FALSE; 1609 b->roworiented = PETSC_TRUE; 1610 b->nonew = 0; 1611 b->diag = 0; 1612 b->solve_work = 0; 1613 b->mult_work = 0; 1614 B->spptr = 0; 1615 b->keepzeroedrows = PETSC_FALSE; 1616 b->xtoy = 0; 1617 b->XtoY = 0; 1618 1619 b->inew = 0; 1620 b->jnew = 0; 1621 b->anew = 0; 1622 b->a2anew = 0; 1623 b->permute = PETSC_FALSE; 1624 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1626 "MatStoreValues_SeqSBAIJ", 1627 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1629 "MatRetrieveValues_SeqSBAIJ", 1630 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1632 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1633 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1635 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1636 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 EXTERN_C_END 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1643 /*@C 1644 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1645 compressed row) format. For good matrix assembly performance the 1646 user should preallocate the matrix storage by setting the parameter nz 1647 (or the array nnz). By setting these parameters accurately, performance 1648 during matrix assembly can be increased by more than a factor of 50. 1649 1650 Collective on Mat 1651 1652 Input Parameters: 1653 + A - the symmetric matrix 1654 . bs - size of block 1655 . nz - number of block nonzeros per block row (same for all rows) 1656 - nnz - array containing the number of block nonzeros in the upper triangular plus 1657 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1658 1659 Options Database Keys: 1660 . -mat_no_unroll - uses code that does not unroll the loops in the 1661 block calculations (much slower) 1662 . -mat_block_size - size of the blocks to use 1663 1664 Level: intermediate 1665 1666 Notes: 1667 Specify the preallocated storage with either nz or nnz (not both). 1668 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1669 allocation. For additional details, see the users manual chapter on 1670 matrices. 1671 1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1673 @*/ 1674 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1675 int ierr,(*f)(Mat,int,int,const int[]); 1676 1677 PetscFunctionBegin; 1678 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1679 if (f) { 1680 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatCreateSeqSBAIJ" 1687 /*@C 1688 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1689 compressed row) format. For good matrix assembly performance the 1690 user should preallocate the matrix storage by setting the parameter nz 1691 (or the array nnz). By setting these parameters accurately, performance 1692 during matrix assembly can be increased by more than a factor of 50. 1693 1694 Collective on MPI_Comm 1695 1696 Input Parameters: 1697 + comm - MPI communicator, set to PETSC_COMM_SELF 1698 . bs - size of block 1699 . m - number of rows, or number of columns 1700 . nz - number of block nonzeros per block row (same for all rows) 1701 - nnz - array containing the number of block nonzeros in the upper triangular plus 1702 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1703 1704 Output Parameter: 1705 . A - the symmetric matrix 1706 1707 Options Database Keys: 1708 . -mat_no_unroll - uses code that does not unroll the loops in the 1709 block calculations (much slower) 1710 . -mat_block_size - size of the blocks to use 1711 1712 Level: intermediate 1713 1714 Notes: 1715 1716 Specify the preallocated storage with either nz or nnz (not both). 1717 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1718 allocation. For additional details, see the users manual chapter on 1719 matrices. 1720 1721 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1722 @*/ 1723 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1724 { 1725 int ierr; 1726 1727 PetscFunctionBegin; 1728 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1729 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1730 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1736 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1737 { 1738 Mat C; 1739 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1740 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1741 1742 PetscFunctionBegin; 1743 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1744 1745 *B = 0; 1746 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1747 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1748 c = (Mat_SeqSBAIJ*)C->data; 1749 1750 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 c->bs = a->bs; 1760 c->bs2 = a->bs2; 1761 c->mbs = a->mbs; 1762 c->nbs = a->nbs; 1763 1764 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1765 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1766 for (i=0; i<mbs; i++) { 1767 c->imax[i] = a->imax[i]; 1768 c->ilen[i] = a->ilen[i]; 1769 } 1770 1771 /* allocate the matrix space */ 1772 c->singlemalloc = PETSC_TRUE; 1773 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1774 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1775 c->j = (int*)(c->a + nz*bs2); 1776 c->i = c->j + nz; 1777 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1778 if (mbs > 0) { 1779 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1780 if (cpvalues == MAT_COPY_VALUES) { 1781 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1782 } else { 1783 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } 1785 } 1786 1787 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1788 c->sorted = a->sorted; 1789 c->roworiented = a->roworiented; 1790 c->nonew = a->nonew; 1791 1792 if (a->diag) { 1793 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1794 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1795 for (i=0; i<mbs; i++) { 1796 c->diag[i] = a->diag[i]; 1797 } 1798 } else c->diag = 0; 1799 c->s_nz = a->s_nz; 1800 c->s_maxnz = a->s_maxnz; 1801 c->solve_work = 0; 1802 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1803 c->mult_work = 0; 1804 *B = C; 1805 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 EXTERN_C_BEGIN 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1812 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1813 { 1814 Mat_SeqSBAIJ *a; 1815 Mat B; 1816 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1817 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1818 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1819 int *masked,nmask,tmp,bs2,ishift; 1820 PetscScalar *aa; 1821 MPI_Comm comm = ((PetscObject)viewer)->comm; 1822 #if defined(PETSC_HAVE_MUMPS) 1823 PetscTruth flag; 1824 #endif 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1890 ierr = MatSetType(B,type);CHKERRQ(ierr); 1891 ierr = MatSetSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1892 a = (Mat_SeqSBAIJ*)B->data; 1893 1894 /* set matrix "i" values */ 1895 a->i[0] = 0; 1896 for (i=1; i<= mbs; i++) { 1897 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1898 a->ilen[i-1] = s_browlengths[i-1]; 1899 } 1900 a->s_nz = a->i[mbs]; 1901 1902 /* read in nonzero values */ 1903 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1906 1907 /* set "a" and "j" values into matrix */ 1908 nzcount = 0; jcount = 0; 1909 for (i=0; i<mbs; i++) { 1910 nzcountb = nzcount; 1911 nmask = 0; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcount++]/bs; /* block col. index */ 1916 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1917 } 1918 } 1919 /* sort the masked values */ 1920 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1921 1922 /* set "j" values into matrix */ 1923 maskcount = 1; 1924 for (j=0; j<nmask; j++) { 1925 a->j[jcount++] = masked[j]; 1926 mask[masked[j]] = maskcount++; 1927 } 1928 1929 /* set "a" values into matrix */ 1930 ishift = bs2*a->i[i]; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[i*bs+j]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcountb]/bs ; /* block col. index */ 1935 if (tmp >= i){ 1936 block = mask[tmp] - 1; 1937 point = jj[nzcountb] - bs*tmp; 1938 idx = ishift + bs2*block + j + bs*point; 1939 a->a[idx] = aa[nzcountb]; 1940 } 1941 nzcountb++; 1942 } 1943 } 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1948 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(aa);CHKERRQ(ierr); 1952 ierr = PetscFree(jj);CHKERRQ(ierr); 1953 ierr = PetscFree(mask);CHKERRQ(ierr); 1954 1955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 #if defined(PETSC_HAVE_MUMPS) 1958 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 1959 if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); } 1960 #endif 1961 ierr = MatView_Private(B);CHKERRQ(ierr); 1962 *A = B; 1963 PetscFunctionReturn(0); 1964 } 1965 EXTERN_C_END 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1969 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1970 { 1971 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1972 MatScalar *aa=a->a,*v,*v1; 1973 PetscScalar *x,*b,*t,sum,d; 1974 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1975 int nz,nz1,*vj,*vj1,i; 1976 1977 PetscFunctionBegin; 1978 its = its*lits; 1979 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1980 1981 if (bs > 1) 1982 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1983 1984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1985 if (xx != bb) { 1986 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1987 } else { 1988 b = x; 1989 } 1990 1991 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1992 1993 if (flag & SOR_ZERO_INITIAL_GUESS) { 1994 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1995 for (i=0; i<m; i++) 1996 t[i] = b[i]; 1997 1998 for (i=0; i<m; i++){ 1999 d = *(aa + ai[i]); /* diag[i] */ 2000 v = aa + ai[i] + 1; 2001 vj = aj + ai[i] + 1; 2002 nz = ai[i+1] - ai[i] - 1; 2003 x[i] = omega*t[i]/d; 2004 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2005 PetscLogFlops(2*nz-1); 2006 } 2007 } 2008 2009 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2010 for (i=0; i<m; i++) 2011 t[i] = b[i]; 2012 2013 for (i=0; i<m-1; i++){ /* update rhs */ 2014 v = aa + ai[i] + 1; 2015 vj = aj + ai[i] + 1; 2016 nz = ai[i+1] - ai[i] - 1; 2017 while (nz--) t[*vj++] -= x[i]*(*v++); 2018 PetscLogFlops(2*nz-1); 2019 } 2020 for (i=m-1; i>=0; i--){ 2021 d = *(aa + ai[i]); 2022 v = aa + ai[i] + 1; 2023 vj = aj + ai[i] + 1; 2024 nz = ai[i+1] - ai[i] - 1; 2025 sum = t[i]; 2026 while (nz--) sum -= x[*vj++]*(*v++); 2027 PetscLogFlops(2*nz-1); 2028 x[i] = (1-omega)*x[i] + omega*sum/d; 2029 } 2030 } 2031 its--; 2032 } 2033 2034 while (its--) { 2035 /* 2036 forward sweep: 2037 for i=0,...,m-1: 2038 sum[i] = (b[i] - U(i,:)x )/d[i]; 2039 x[i] = (1-omega)x[i] + omega*sum[i]; 2040 b = b - x[i]*U^T(i,:); 2041 2042 */ 2043 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2044 for (i=0; i<m; i++) 2045 t[i] = b[i]; 2046 2047 for (i=0; i<m; i++){ 2048 d = *(aa + ai[i]); /* diag[i] */ 2049 v = aa + ai[i] + 1; v1=v; 2050 vj = aj + ai[i] + 1; vj1=vj; 2051 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2052 sum = t[i]; 2053 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2054 x[i] = (1-omega)*x[i] + omega*sum/d; 2055 while (nz--) t[*vj++] -= x[i]*(*v++); 2056 PetscLogFlops(4*nz-2); 2057 } 2058 } 2059 2060 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2061 /* 2062 backward sweep: 2063 b = b - x[i]*U^T(i,:), i=0,...,n-2 2064 for i=m-1,...,0: 2065 sum[i] = (b[i] - U(i,:)x )/d[i]; 2066 x[i] = (1-omega)x[i] + omega*sum[i]; 2067 */ 2068 for (i=0; i<m; i++) 2069 t[i] = b[i]; 2070 2071 for (i=0; i<m-1; i++){ /* update rhs */ 2072 v = aa + ai[i] + 1; 2073 vj = aj + ai[i] + 1; 2074 nz = ai[i+1] - ai[i] - 1; 2075 while (nz--) t[*vj++] -= x[i]*(*v++); 2076 PetscLogFlops(2*nz-1); 2077 } 2078 for (i=m-1; i>=0; i--){ 2079 d = *(aa + ai[i]); 2080 v = aa + ai[i] + 1; 2081 vj = aj + ai[i] + 1; 2082 nz = ai[i+1] - ai[i] - 1; 2083 sum = t[i]; 2084 while (nz--) sum -= x[*vj++]*(*v++); 2085 PetscLogFlops(2*nz-1); 2086 x[i] = (1-omega)*x[i] + omega*sum/d; 2087 } 2088 } 2089 } 2090 2091 ierr = PetscFree(t); CHKERRQ(ierr); 2092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2093 if (bb != xx) { 2094 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2095 } 2096 2097 PetscFunctionReturn(0); 2098 } 2099 2100 2101 2102 2103 2104 2105