1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = PETSC_TRUE; 154 break; 155 case MAT_COLUMN_ORIENTED: 156 a->roworiented = PETSC_FALSE; 157 break; 158 case MAT_COLUMNS_SORTED: 159 a->sorted = PETSC_TRUE; 160 break; 161 case MAT_COLUMNS_UNSORTED: 162 a->sorted = PETSC_FALSE; 163 break; 164 case MAT_KEEP_ZEROED_ROWS: 165 a->keepzeroedrows = PETSC_TRUE; 166 break; 167 case MAT_NO_NEW_NONZERO_LOCATIONS: 168 a->nonew = 1; 169 break; 170 case MAT_NEW_NONZERO_LOCATION_ERR: 171 a->nonew = -1; 172 break; 173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 174 a->nonew = -2; 175 break; 176 case MAT_YES_NEW_NONZERO_LOCATIONS: 177 a->nonew = 0; 178 break; 179 case MAT_ROWS_SORTED: 180 case MAT_ROWS_UNSORTED: 181 case MAT_YES_NEW_DIAGONALS: 182 case MAT_IGNORE_OFF_PROC_ENTRIES: 183 case MAT_USE_HASH_TABLE: 184 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 185 break; 186 case MAT_NO_NEW_DIAGONALS: 187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 188 case MAT_NOT_SYMMETRIC: 189 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 190 case MAT_HERMITIAN: 191 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 192 case MAT_SYMMETRIC: 193 case MAT_STRUCTURALLY_SYMMETRIC: 194 case MAT_NOT_HERMITIAN: 195 case MAT_SYMMETRY_ETERNAL: 196 case MAT_NOT_SYMMETRY_ETERNAL: 197 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 198 break; 199 case MAT_IGNORE_LOWER_TRIANGULAR: 200 a->ignore_ltriangular = PETSC_TRUE; 201 break; 202 case MAT_ERROR_LOWER_TRIANGULAR: 203 a->ignore_ltriangular = PETSC_FALSE; 204 break; 205 case MAT_GETROW_UPPERTRIANGULAR: 206 a->getrow_utriangular = PETSC_TRUE; 207 break; 208 default: 209 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 221 MatScalar *aa,*aa_i; 222 PetscScalar *v_i; 223 224 PetscFunctionBegin; 225 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 226 /* Get the upper triangular part of the row */ 227 bs = A->rmap.bs; 228 ai = a->i; 229 aj = a->j; 230 aa = a->a; 231 bs2 = a->bs2; 232 233 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 234 235 bn = row/bs; /* Block number */ 236 bp = row % bs; /* Block position */ 237 M = ai[bn+1] - ai[bn]; 238 *ncols = bs*M; 239 240 if (v) { 241 *v = 0; 242 if (*ncols) { 243 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 244 for (i=0; i<M; i++) { /* for each block in the block row */ 245 v_i = *v + i*bs; 246 aa_i = aa + bs2*(ai[bn] + i); 247 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 248 } 249 } 250 } 251 252 if (cols) { 253 *cols = 0; 254 if (*ncols) { 255 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 256 for (i=0; i<M; i++) { /* for each block in the block row */ 257 cols_i = *cols + i*bs; 258 itmp = bs*aj[ai[bn] + i]; 259 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 260 } 261 } 262 } 263 264 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 265 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 266 #ifdef column_search 267 v_i = *v + M*bs; 268 cols_i = *cols + M*bs; 269 for (i=0; i<bn; i++){ /* for each block row */ 270 M = ai[i+1] - ai[i]; 271 for (j=0; j<M; j++){ 272 itmp = aj[ai[i] + j]; /* block column value */ 273 if (itmp == bn){ 274 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 275 for (k=0; k<bs; k++) { 276 *cols_i++ = i*bs+k; 277 *v_i++ = aa_i[k]; 278 } 279 *ncols += bs; 280 break; 281 } 282 } 283 } 284 #endif 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 296 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 306 PetscFunctionBegin; 307 a->getrow_utriangular = PETSC_TRUE; 308 PetscFunctionReturn(0); 309 } 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 313 { 314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 a->getrow_utriangular = PETSC_FALSE; 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 PetscErrorCode ierr; 337 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 338 const char *name; 339 PetscViewerFormat format; 340 341 PetscFunctionBegin; 342 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 343 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 344 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 345 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 346 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 347 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 348 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 349 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 350 for (i=0; i<a->mbs; i++) { 351 for (j=0; j<bs; j++) { 352 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 353 for (k=a->i[i]; k<a->i[i+1]; k++) { 354 for (l=0; l<bs; l++) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 360 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 361 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 362 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 364 } 365 #else 366 if (a->a[bs2*k + l*bs + j] != 0.0) { 367 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 368 } 369 #endif 370 } 371 } 372 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 373 } 374 } 375 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 376 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 377 PetscFunctionReturn(0); 378 } else { 379 if (A->factor && bs>1){ 380 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 381 } 382 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 386 for (k=a->i[i]; k<a->i[i+1]; k++) { 387 for (l=0; l<bs; l++) { 388 #if defined(PETSC_USE_COMPLEX) 389 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 391 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 392 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 394 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 395 } else { 396 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 397 } 398 #else 399 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 400 #endif 401 } 402 } 403 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 404 } 405 } 406 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 407 } 408 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 414 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 415 { 416 Mat A = (Mat) Aa; 417 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 418 PetscErrorCode ierr; 419 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 420 PetscMPIInt rank; 421 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 422 MatScalar *aa; 423 MPI_Comm comm; 424 PetscViewer viewer; 425 426 PetscFunctionBegin; 427 /* 428 This is nasty. If this is called from an originally parallel matrix 429 then all processes call this,but only the first has the matrix so the 430 rest should return immediately. 431 */ 432 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 433 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 434 if (rank) PetscFunctionReturn(0); 435 436 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 437 438 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 439 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 440 441 /* loop over matrix elements drawing boxes */ 442 color = PETSC_DRAW_BLUE; 443 for (i=0,row=0; i<mbs; i++,row+=bs) { 444 for (j=a->i[i]; j<a->i[i+1]; j++) { 445 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 446 x_l = a->j[j]*bs; x_r = x_l + 1.0; 447 aa = a->a + j*bs2; 448 for (k=0; k<bs; k++) { 449 for (l=0; l<bs; l++) { 450 if (PetscRealPart(*aa++) >= 0.) continue; 451 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 color = PETSC_DRAW_CYAN; 457 for (i=0,row=0; i<mbs; i++,row+=bs) { 458 for (j=a->i[i]; j<a->i[i+1]; j++) { 459 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 460 x_l = a->j[j]*bs; x_r = x_l + 1.0; 461 aa = a->a + j*bs2; 462 for (k=0; k<bs; k++) { 463 for (l=0; l<bs; l++) { 464 if (PetscRealPart(*aa++) != 0.) continue; 465 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 466 } 467 } 468 } 469 } 470 471 color = PETSC_DRAW_RED; 472 for (i=0,row=0; i<mbs; i++,row+=bs) { 473 for (j=a->i[i]; j<a->i[i+1]; j++) { 474 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 475 x_l = a->j[j]*bs; x_r = x_l + 1.0; 476 aa = a->a + j*bs2; 477 for (k=0; k<bs; k++) { 478 for (l=0; l<bs; l++) { 479 if (PetscRealPart(*aa++) <= 0.) continue; 480 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 490 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 491 { 492 PetscErrorCode ierr; 493 PetscReal xl,yl,xr,yr,w,h; 494 PetscDraw draw; 495 PetscTruth isnull; 496 497 PetscFunctionBegin; 498 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 499 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 500 501 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 502 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 503 xr += w; yr += h; xl = -w; yl = -h; 504 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 505 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 506 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatView_SeqSBAIJ" 512 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 513 { 514 PetscErrorCode ierr; 515 PetscTruth iascii,isdraw; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (iascii){ 521 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 522 } else if (isdraw) { 523 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 524 } else { 525 Mat B; 526 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 527 ierr = MatView(B,viewer);CHKERRQ(ierr); 528 ierr = MatDestroy(B);CHKERRQ(ierr); 529 } 530 PetscFunctionReturn(0); 531 } 532 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 536 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 537 { 538 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 539 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 540 PetscInt *ai = a->i,*ailen = a->ilen; 541 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 542 MatScalar *ap,*aa = a->a,zero = 0.0; 543 544 PetscFunctionBegin; 545 for (k=0; k<m; k++) { /* loop over rows */ 546 row = im[k]; brow = row/bs; 547 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 548 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 549 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 550 nrow = ailen[brow]; 551 for (l=0; l<n; l++) { /* loop over columns */ 552 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 553 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 554 col = in[l] ; 555 bcol = col/bs; 556 cidx = col%bs; 557 ridx = row%bs; 558 high = nrow; 559 low = 0; /* assume unsorted */ 560 while (high-low > 5) { 561 t = (low+high)/2; 562 if (rp[t] > bcol) high = t; 563 else low = t; 564 } 565 for (i=low; i<high; i++) { 566 if (rp[i] > bcol) break; 567 if (rp[i] == bcol) { 568 *v++ = ap[bs2*i+bs*cidx+ridx]; 569 goto finished; 570 } 571 } 572 *v++ = zero; 573 finished:; 574 } 575 } 576 PetscFunctionReturn(0); 577 } 578 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 582 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 583 { 584 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 587 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 588 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 589 PetscTruth roworiented=a->roworiented; 590 const MatScalar *value = v; 591 MatScalar *ap,*aa = a->a,*bap; 592 593 PetscFunctionBegin; 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (k=0; k<m; k++) { /* loop over added rows */ 600 row = im[k]; 601 if (row < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 604 #endif 605 rp = aj + ai[row]; 606 ap = aa + bs2*ai[row]; 607 rmax = imax[row]; 608 nrow = ailen[row]; 609 low = 0; 610 high = nrow; 611 for (l=0; l<n; l++) { /* loop over added columns */ 612 if (in[l] < 0) continue; 613 col = in[l]; 614 #if defined(PETSC_USE_DEBUG) 615 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 616 #endif 617 if (col < row) continue; /* ignore lower triangular block */ 618 if (roworiented) { 619 value = v + k*(stepval+bs)*bs + l*bs; 620 } else { 621 value = v + l*(stepval+bs)*bs + k*bs; 622 } 623 if (col <= lastcol) low = 0; else high = nrow; 624 lastcol = col; 625 while (high-low > 7) { 626 t = (low+high)/2; 627 if (rp[t] > col) high = t; 628 else low = t; 629 } 630 for (i=low; i<high; i++) { 631 if (rp[i] > col) break; 632 if (rp[i] == col) { 633 bap = ap + bs2*i; 634 if (roworiented) { 635 if (is == ADD_VALUES) { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] += *value++; 639 } 640 } 641 } else { 642 for (ii=0; ii<bs; ii++,value+=stepval) { 643 for (jj=ii; jj<bs2; jj+=bs) { 644 bap[jj] = *value++; 645 } 646 } 647 } 648 } else { 649 if (is == ADD_VALUES) { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=0; jj<bs; jj++) { 652 *bap++ += *value++; 653 } 654 } 655 } else { 656 for (ii=0; ii<bs; ii++,value+=stepval) { 657 for (jj=0; jj<bs; jj++) { 658 *bap++ = *value++; 659 } 660 } 661 } 662 } 663 goto noinsert2; 664 } 665 } 666 if (nonew == 1) goto noinsert2; 667 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 668 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 669 N = nrow++ - 1; high++; 670 /* shift up all the later entries in this row */ 671 for (ii=N; ii>=i; ii--) { 672 rp[ii+1] = rp[ii]; 673 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 674 } 675 if (N >= i) { 676 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 677 } 678 rp[i] = col; 679 bap = ap + bs2*i; 680 if (roworiented) { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=ii; jj<bs2; jj+=bs) { 683 bap[jj] = *value++; 684 } 685 } 686 } else { 687 for (ii=0; ii<bs; ii++,value+=stepval) { 688 for (jj=0; jj<bs; jj++) { 689 *bap++ = *value++; 690 } 691 } 692 } 693 noinsert2:; 694 low = i; 695 } 696 ailen[row] = nrow; 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 703 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 704 { 705 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 706 PetscErrorCode ierr; 707 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 708 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 709 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 710 MatScalar *aa = a->a,*ap; 711 712 PetscFunctionBegin; 713 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 714 715 if (m) rmax = ailen[0]; 716 for (i=1; i<mbs; i++) { 717 /* move each row back by the amount of empty slots (fshift) before it*/ 718 fshift += imax[i-1] - ailen[i-1]; 719 rmax = PetscMax(rmax,ailen[i]); 720 if (fshift) { 721 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 722 N = ailen[i]; 723 for (j=0; j<N; j++) { 724 ip[j-fshift] = ip[j]; 725 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 726 } 727 } 728 ai[i] = ai[i-1] + ailen[i-1]; 729 } 730 if (mbs) { 731 fshift += imax[mbs-1] - ailen[mbs-1]; 732 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 733 } 734 /* reset ilen and imax for each row */ 735 for (i=0; i<mbs; i++) { 736 ailen[i] = imax[i] = ai[i+1] - ai[i]; 737 } 738 a->nz = ai[mbs]; 739 740 /* diagonals may have moved, reset it */ 741 if (a->diag) { 742 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 743 } 744 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 745 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 746 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 747 a->reallocs = 0; 748 A->info.nz_unneeded = (PetscReal)fshift*bs2; 749 PetscFunctionReturn(0); 750 } 751 752 /* 753 This function returns an array of flags which indicate the locations of contiguous 754 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 755 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 756 Assume: sizes should be long enough to hold all the values. 757 */ 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 761 { 762 PetscInt i,j,k,row; 763 PetscTruth flg; 764 765 PetscFunctionBegin; 766 for (i=0,j=0; i<n; j++) { 767 row = idx[i]; 768 if (row%bs!=0) { /* Not the begining of a block */ 769 sizes[j] = 1; 770 i++; 771 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 772 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 773 i++; 774 } else { /* Begining of the block, so check if the complete block exists */ 775 flg = PETSC_TRUE; 776 for (k=1; k<bs; k++) { 777 if (row+k != idx[i+k]) { /* break in the block */ 778 flg = PETSC_FALSE; 779 break; 780 } 781 } 782 if (flg) { /* No break in the bs */ 783 sizes[j] = bs; 784 i+= bs; 785 } else { 786 sizes[j] = 1; 787 i++; 788 } 789 } 790 } 791 *bs_max = j; 792 PetscFunctionReturn(0); 793 } 794 795 796 /* Only add/insert a(i,j) with i<=j (blocks). 797 Any a(i,j) with i>j input by user is ingored. 798 */ 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 802 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 803 { 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 805 PetscErrorCode ierr; 806 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 807 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 808 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 809 PetscInt ridx,cidx,bs2=a->bs2; 810 MatScalar *ap,value,*aa=a->a,*bap; 811 812 PetscFunctionBegin; 813 for (k=0; k<m; k++) { /* loop over added rows */ 814 row = im[k]; /* row number */ 815 brow = row/bs; /* block row number */ 816 if (row < 0) continue; 817 #if defined(PETSC_USE_DEBUG) 818 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 819 #endif 820 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 821 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 822 rmax = imax[brow]; /* maximum space allocated for this row */ 823 nrow = ailen[brow]; /* actual length of this row */ 824 low = 0; 825 826 for (l=0; l<n; l++) { /* loop over added columns */ 827 if (in[l] < 0) continue; 828 #if defined(PETSC_USE_DEBUG) 829 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 830 #endif 831 col = in[l]; 832 bcol = col/bs; /* block col number */ 833 834 if (brow > bcol) { 835 if (a->ignore_ltriangular){ 836 continue; /* ignore lower triangular values */ 837 } else { 838 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 839 } 840 } 841 842 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 843 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 844 /* element value a(k,l) */ 845 if (roworiented) { 846 value = v[l + k*n]; 847 } else { 848 value = v[k + l*m]; 849 } 850 851 /* move pointer bap to a(k,l) quickly and add/insert value */ 852 if (col <= lastcol) low = 0; high = nrow; 853 lastcol = col; 854 while (high-low > 7) { 855 t = (low+high)/2; 856 if (rp[t] > bcol) high = t; 857 else low = t; 858 } 859 for (i=low; i<high; i++) { 860 if (rp[i] > bcol) break; 861 if (rp[i] == bcol) { 862 bap = ap + bs2*i + bs*cidx + ridx; 863 if (is == ADD_VALUES) *bap += value; 864 else *bap = value; 865 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 866 if (brow == bcol && ridx < cidx){ 867 bap = ap + bs2*i + bs*ridx + cidx; 868 if (is == ADD_VALUES) *bap += value; 869 else *bap = value; 870 } 871 goto noinsert1; 872 } 873 } 874 875 if (nonew == 1) goto noinsert1; 876 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 877 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 878 879 N = nrow++ - 1; high++; 880 /* shift up all the later entries in this row */ 881 for (ii=N; ii>=i; ii--) { 882 rp[ii+1] = rp[ii]; 883 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 884 } 885 if (N>=i) { 886 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 } 888 rp[i] = bcol; 889 ap[bs2*i + bs*cidx + ridx] = value; 890 noinsert1:; 891 low = i; 892 } 893 } /* end of loop over added columns */ 894 ailen[brow] = nrow; 895 } /* end of loop over added rows */ 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 901 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 902 { 903 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 904 Mat outA; 905 PetscErrorCode ierr; 906 PetscTruth row_identity; 907 908 PetscFunctionBegin; 909 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 910 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 911 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 912 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 913 914 outA = inA; 915 inA->factor = FACTOR_CHOLESKY; 916 917 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 918 /* 919 Blocksize < 8 have a special faster factorization/solver 920 for ICC(0) factorization with natural ordering 921 */ 922 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 923 case 1: 924 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 926 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 927 inA->ops->solves = MatSolves_SeqSBAIJ_1; 928 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 929 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 930 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 931 break; 932 case 2: 933 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 934 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 935 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 936 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 937 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 938 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 939 break; 940 case 3: 941 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 942 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 943 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 944 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 945 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 946 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 947 break; 948 case 4: 949 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 950 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 951 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 952 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 953 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 954 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 955 break; 956 case 5: 957 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 958 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 959 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 960 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 961 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 962 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 963 break; 964 case 6: 965 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 966 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 967 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 968 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 970 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 971 break; 972 case 7: 973 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 974 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 977 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 978 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 979 break; 980 default: 981 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 982 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 983 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 984 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 985 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 986 break; 987 } 988 989 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 990 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 991 a->row = row; 992 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 993 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 994 a->col = row; 995 996 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 997 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 998 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 999 1000 if (!a->solve_work) { 1001 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1002 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1003 } 1004 1005 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 EXTERN_C_BEGIN 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1012 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1013 { 1014 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1015 PetscInt i,nz,n; 1016 1017 PetscFunctionBegin; 1018 nz = baij->maxnz; 1019 n = mat->cmap.n; 1020 for (i=0; i<nz; i++) { 1021 baij->j[i] = indices[i]; 1022 } 1023 baij->nz = nz; 1024 for (i=0; i<n; i++) { 1025 baij->ilen[i] = baij->imax[i]; 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 EXTERN_C_END 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1033 /*@ 1034 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1035 in the matrix. 1036 1037 Input Parameters: 1038 + mat - the SeqSBAIJ matrix 1039 - indices - the column indices 1040 1041 Level: advanced 1042 1043 Notes: 1044 This can be called if you have precomputed the nonzero structure of the 1045 matrix and want to provide it to the matrix object to improve the performance 1046 of the MatSetValues() operation. 1047 1048 You MUST have set the correct numbers of nonzeros per row in the call to 1049 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1050 1051 MUST be called before any calls to MatSetValues() 1052 1053 .seealso: MatCreateSeqSBAIJ 1054 @*/ 1055 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1056 { 1057 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1058 1059 PetscFunctionBegin; 1060 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1061 PetscValidPointer(indices,2); 1062 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1063 if (f) { 1064 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1065 } else { 1066 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1073 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1074 { 1075 PetscErrorCode ierr; 1076 1077 PetscFunctionBegin; 1078 /* If the two matrices have the same copy implementation, use fast copy. */ 1079 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1080 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1081 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1082 1083 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1084 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1085 } 1086 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1087 } else { 1088 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1089 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1090 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1097 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1098 { 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1108 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1109 { 1110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1111 PetscFunctionBegin; 1112 *array = a->a; 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1118 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1119 { 1120 PetscFunctionBegin; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #include "petscblaslapack.h" 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1127 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1128 { 1129 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1130 PetscErrorCode ierr; 1131 PetscInt i,bs=Y->rmap.bs,bs2,j; 1132 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1133 1134 PetscFunctionBegin; 1135 if (str == SAME_NONZERO_PATTERN) { 1136 PetscScalar alpha = a; 1137 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1138 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1139 if (y->xtoy && y->XtoY != X) { 1140 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1141 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1142 } 1143 if (!y->xtoy) { /* get xtoy */ 1144 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1145 y->XtoY = X; 1146 } 1147 bs2 = bs*bs; 1148 for (i=0; i<x->nz; i++) { 1149 j = 0; 1150 while (j < bs2){ 1151 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1152 j++; 1153 } 1154 } 1155 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1156 } else { 1157 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1158 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1159 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1166 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1167 { 1168 PetscFunctionBegin; 1169 *flg = PETSC_TRUE; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1175 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1176 { 1177 PetscFunctionBegin; 1178 *flg = PETSC_TRUE; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1184 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1185 { 1186 PetscFunctionBegin; 1187 *flg = PETSC_FALSE; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1193 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1194 { 1195 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1196 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1197 PetscScalar *aa = a->a; 1198 1199 PetscFunctionBegin; 1200 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1206 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1207 { 1208 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1209 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1210 PetscScalar *aa = a->a; 1211 1212 PetscFunctionBegin; 1213 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* -------------------------------------------------------------------*/ 1218 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1219 MatGetRow_SeqSBAIJ, 1220 MatRestoreRow_SeqSBAIJ, 1221 MatMult_SeqSBAIJ_N, 1222 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1223 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1224 MatMultAdd_SeqSBAIJ_N, 1225 MatSolve_SeqSBAIJ_N, 1226 0, 1227 0, 1228 /*10*/ 0, 1229 0, 1230 MatCholeskyFactor_SeqSBAIJ, 1231 MatRelax_SeqSBAIJ, 1232 MatTranspose_SeqSBAIJ, 1233 /*15*/ MatGetInfo_SeqSBAIJ, 1234 MatEqual_SeqSBAIJ, 1235 MatGetDiagonal_SeqSBAIJ, 1236 MatDiagonalScale_SeqSBAIJ, 1237 MatNorm_SeqSBAIJ, 1238 /*20*/ 0, 1239 MatAssemblyEnd_SeqSBAIJ, 1240 0, 1241 MatSetOption_SeqSBAIJ, 1242 MatZeroEntries_SeqSBAIJ, 1243 /*25*/ 0, 1244 0, 1245 0, 1246 MatCholeskyFactorSymbolic_SeqSBAIJ, 1247 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1248 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1249 0, 1250 MatICCFactorSymbolic_SeqSBAIJ, 1251 MatGetArray_SeqSBAIJ, 1252 MatRestoreArray_SeqSBAIJ, 1253 /*35*/ MatDuplicate_SeqSBAIJ, 1254 MatForwardSolve_SeqSBAIJ_N, 1255 MatBackwardSolve_SeqSBAIJ_N, 1256 0, 1257 MatICCFactor_SeqSBAIJ, 1258 /*40*/ MatAXPY_SeqSBAIJ, 1259 MatGetSubMatrices_SeqSBAIJ, 1260 MatIncreaseOverlap_SeqSBAIJ, 1261 MatGetValues_SeqSBAIJ, 1262 MatCopy_SeqSBAIJ, 1263 /*45*/ 0, 1264 MatScale_SeqSBAIJ, 1265 0, 1266 0, 1267 0, 1268 /*50*/ 0, 1269 MatGetRowIJ_SeqSBAIJ, 1270 MatRestoreRowIJ_SeqSBAIJ, 1271 0, 1272 0, 1273 /*55*/ 0, 1274 0, 1275 0, 1276 0, 1277 MatSetValuesBlocked_SeqSBAIJ, 1278 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 0, 1283 /*65*/ 0, 1284 0, 1285 0, 1286 0, 1287 0, 1288 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1289 0, 1290 0, 1291 0, 1292 0, 1293 /*75*/ 0, 1294 0, 1295 0, 1296 0, 1297 0, 1298 /*80*/ 0, 1299 0, 1300 0, 1301 #if !defined(PETSC_USE_COMPLEX) 1302 MatGetInertia_SeqSBAIJ, 1303 #else 1304 0, 1305 #endif 1306 MatLoad_SeqSBAIJ, 1307 /*85*/ MatIsSymmetric_SeqSBAIJ, 1308 MatIsHermitian_SeqSBAIJ, 1309 MatIsStructurallySymmetric_SeqSBAIJ, 1310 0, 1311 0, 1312 /*90*/ 0, 1313 0, 1314 0, 1315 0, 1316 0, 1317 /*95*/ 0, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /*100*/0, 1323 0, 1324 0, 1325 0, 1326 0, 1327 /*105*/0, 1328 MatRealPart_SeqSBAIJ, 1329 MatImaginaryPart_SeqSBAIJ, 1330 MatGetRowUpperTriangular_SeqSBAIJ, 1331 MatRestoreRowUpperTriangular_SeqSBAIJ 1332 }; 1333 1334 EXTERN_C_BEGIN 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1337 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1338 { 1339 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1340 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 if (aij->nonew != 1) { 1345 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1346 } 1347 1348 /* allocate space for values if not already there */ 1349 if (!aij->saved_values) { 1350 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1351 } 1352 1353 /* copy values over */ 1354 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 EXTERN_C_END 1358 1359 EXTERN_C_BEGIN 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1362 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1363 { 1364 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1365 PetscErrorCode ierr; 1366 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1367 1368 PetscFunctionBegin; 1369 if (aij->nonew != 1) { 1370 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1371 } 1372 if (!aij->saved_values) { 1373 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1374 } 1375 1376 /* copy values over */ 1377 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 EXTERN_C_END 1381 1382 EXTERN_C_BEGIN 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1385 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1386 { 1387 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1388 PetscErrorCode ierr; 1389 PetscInt i,mbs,bs2; 1390 PetscTruth skipallocation = PETSC_FALSE,flg; 1391 1392 PetscFunctionBegin; 1393 B->preallocated = PETSC_TRUE; 1394 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1395 B->rmap.bs = B->cmap.bs = bs; 1396 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1397 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1398 1399 mbs = B->rmap.N/bs; 1400 bs2 = bs*bs; 1401 1402 if (mbs*bs != B->rmap.N) { 1403 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1404 } 1405 1406 if (nz == MAT_SKIP_ALLOCATION) { 1407 skipallocation = PETSC_TRUE; 1408 nz = 0; 1409 } 1410 1411 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1412 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1413 if (nnz) { 1414 for (i=0; i<mbs; i++) { 1415 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1416 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1417 } 1418 } 1419 1420 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1421 if (!flg) { 1422 switch (bs) { 1423 case 1: 1424 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1425 B->ops->solve = MatSolve_SeqSBAIJ_1; 1426 B->ops->solves = MatSolves_SeqSBAIJ_1; 1427 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1428 B->ops->mult = MatMult_SeqSBAIJ_1; 1429 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1430 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1431 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1432 break; 1433 case 2: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1435 B->ops->solve = MatSolve_SeqSBAIJ_2; 1436 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1437 B->ops->mult = MatMult_SeqSBAIJ_2; 1438 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1439 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1440 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1441 break; 1442 case 3: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1444 B->ops->solve = MatSolve_SeqSBAIJ_3; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1446 B->ops->mult = MatMult_SeqSBAIJ_3; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1448 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1449 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1450 break; 1451 case 4: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1453 B->ops->solve = MatSolve_SeqSBAIJ_4; 1454 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1455 B->ops->mult = MatMult_SeqSBAIJ_4; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1457 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1458 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1459 break; 1460 case 5: 1461 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1462 B->ops->solve = MatSolve_SeqSBAIJ_5; 1463 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1464 B->ops->mult = MatMult_SeqSBAIJ_5; 1465 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1466 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1467 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1468 break; 1469 case 6: 1470 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1471 B->ops->solve = MatSolve_SeqSBAIJ_6; 1472 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1473 B->ops->mult = MatMult_SeqSBAIJ_6; 1474 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1475 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1476 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1477 break; 1478 case 7: 1479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1480 B->ops->solve = MatSolve_SeqSBAIJ_7; 1481 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1482 B->ops->mult = MatMult_SeqSBAIJ_7; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1484 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1485 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1486 break; 1487 } 1488 } 1489 1490 b->mbs = mbs; 1491 b->nbs = mbs; 1492 if (!skipallocation) { 1493 /* b->ilen will count nonzeros in each block row so far. */ 1494 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1495 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1496 if (!nnz) { 1497 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1498 else if (nz <= 0) nz = 1; 1499 for (i=0; i<mbs; i++) { 1500 b->imax[i] = nz; 1501 } 1502 nz = nz*mbs; /* total nz */ 1503 } else { 1504 nz = 0; 1505 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1506 } 1507 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1508 1509 /* allocate the matrix space */ 1510 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1511 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1512 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1513 b->singlemalloc = PETSC_TRUE; 1514 1515 /* pointer to beginning of each row */ 1516 b->i[0] = 0; 1517 for (i=1; i<mbs+1; i++) { 1518 b->i[i] = b->i[i-1] + b->imax[i-1]; 1519 } 1520 b->free_a = PETSC_TRUE; 1521 b->free_ij = PETSC_TRUE; 1522 } else { 1523 b->free_a = PETSC_FALSE; 1524 b->free_ij = PETSC_FALSE; 1525 } 1526 1527 B->rmap.bs = bs; 1528 b->bs2 = bs2; 1529 b->nz = 0; 1530 b->maxnz = nz*bs2; 1531 1532 b->inew = 0; 1533 b->jnew = 0; 1534 b->anew = 0; 1535 b->a2anew = 0; 1536 b->permute = PETSC_FALSE; 1537 PetscFunctionReturn(0); 1538 } 1539 EXTERN_C_END 1540 1541 EXTERN_C_BEGIN 1542 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1544 EXTERN_C_END 1545 1546 /*MC 1547 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1548 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1549 1550 Options Database Keys: 1551 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1552 1553 Level: beginner 1554 1555 .seealso: MatCreateSeqSBAIJ 1556 M*/ 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1562 { 1563 Mat_SeqSBAIJ *b; 1564 PetscErrorCode ierr; 1565 PetscMPIInt size; 1566 PetscTruth flg; 1567 1568 PetscFunctionBegin; 1569 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1570 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1571 1572 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1573 B->data = (void*)b; 1574 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1575 B->ops->destroy = MatDestroy_SeqSBAIJ; 1576 B->ops->view = MatView_SeqSBAIJ; 1577 B->factor = 0; 1578 B->mapping = 0; 1579 b->row = 0; 1580 b->icol = 0; 1581 b->reallocs = 0; 1582 b->saved_values = 0; 1583 1584 1585 b->sorted = PETSC_FALSE; 1586 b->roworiented = PETSC_TRUE; 1587 b->nonew = 0; 1588 b->diag = 0; 1589 b->solve_work = 0; 1590 b->mult_work = 0; 1591 B->spptr = 0; 1592 b->keepzeroedrows = PETSC_FALSE; 1593 b->xtoy = 0; 1594 b->XtoY = 0; 1595 1596 b->inew = 0; 1597 b->jnew = 0; 1598 b->anew = 0; 1599 b->a2anew = 0; 1600 b->permute = PETSC_FALSE; 1601 1602 b->ignore_ltriangular = PETSC_FALSE; 1603 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1604 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1605 1606 b->getrow_utriangular = PETSC_FALSE; 1607 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1608 if (flg) b->getrow_utriangular = PETSC_TRUE; 1609 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1611 "MatStoreValues_SeqSBAIJ", 1612 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1614 "MatRetrieveValues_SeqSBAIJ", 1615 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1617 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1618 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1620 "MatConvert_SeqSBAIJ_SeqAIJ", 1621 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1623 "MatConvert_SeqSBAIJ_SeqBAIJ", 1624 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1626 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1627 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1628 1629 B->symmetric = PETSC_TRUE; 1630 B->structurally_symmetric = PETSC_TRUE; 1631 B->symmetric_set = PETSC_TRUE; 1632 B->structurally_symmetric_set = PETSC_TRUE; 1633 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 EXTERN_C_END 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1640 /*@C 1641 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1642 compressed row) format. For good matrix assembly performance the 1643 user should preallocate the matrix storage by setting the parameter nz 1644 (or the array nnz). By setting these parameters accurately, performance 1645 during matrix assembly can be increased by more than a factor of 50. 1646 1647 Collective on Mat 1648 1649 Input Parameters: 1650 + A - the symmetric matrix 1651 . bs - size of block 1652 . nz - number of block nonzeros per block row (same for all rows) 1653 - nnz - array containing the number of block nonzeros in the upper triangular plus 1654 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1655 1656 Options Database Keys: 1657 . -mat_no_unroll - uses code that does not unroll the loops in the 1658 block calculations (much slower) 1659 . -mat_block_size - size of the blocks to use 1660 1661 Level: intermediate 1662 1663 Notes: 1664 Specify the preallocated storage with either nz or nnz (not both). 1665 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1666 allocation. For additional details, see the users manual chapter on 1667 matrices. 1668 1669 If the nnz parameter is given then the nz parameter is ignored 1670 1671 1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1673 @*/ 1674 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1675 { 1676 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1677 1678 PetscFunctionBegin; 1679 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1680 if (f) { 1681 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatCreateSeqSBAIJ" 1688 /*@C 1689 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1690 compressed row) format. For good matrix assembly performance the 1691 user should preallocate the matrix storage by setting the parameter nz 1692 (or the array nnz). By setting these parameters accurately, performance 1693 during matrix assembly can be increased by more than a factor of 50. 1694 1695 Collective on MPI_Comm 1696 1697 Input Parameters: 1698 + comm - MPI communicator, set to PETSC_COMM_SELF 1699 . bs - size of block 1700 . m - number of rows, or number of columns 1701 . nz - number of block nonzeros per block row (same for all rows) 1702 - nnz - array containing the number of block nonzeros in the upper triangular plus 1703 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1704 1705 Output Parameter: 1706 . A - the symmetric matrix 1707 1708 Options Database Keys: 1709 . -mat_no_unroll - uses code that does not unroll the loops in the 1710 block calculations (much slower) 1711 . -mat_block_size - size of the blocks to use 1712 1713 Level: intermediate 1714 1715 Notes: 1716 1717 Specify the preallocated storage with either nz or nnz (not both). 1718 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1719 allocation. For additional details, see the users manual chapter on 1720 matrices. 1721 1722 If the nnz parameter is given then the nz parameter is ignored 1723 1724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1725 @*/ 1726 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1727 { 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1732 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1733 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1734 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1740 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1741 { 1742 Mat C; 1743 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1744 PetscErrorCode ierr; 1745 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1746 1747 PetscFunctionBegin; 1748 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1749 1750 *B = 0; 1751 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1752 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1753 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1754 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1755 c = (Mat_SeqSBAIJ*)C->data; 1756 1757 C->preallocated = PETSC_TRUE; 1758 C->factor = A->factor; 1759 c->row = 0; 1760 c->icol = 0; 1761 c->saved_values = 0; 1762 c->keepzeroedrows = a->keepzeroedrows; 1763 C->assembled = PETSC_TRUE; 1764 1765 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1766 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1767 c->bs2 = a->bs2; 1768 c->mbs = a->mbs; 1769 c->nbs = a->nbs; 1770 1771 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1772 for (i=0; i<mbs; i++) { 1773 c->imax[i] = a->imax[i]; 1774 c->ilen[i] = a->ilen[i]; 1775 } 1776 1777 /* allocate the matrix space */ 1778 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1779 c->singlemalloc = PETSC_TRUE; 1780 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1781 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1782 if (mbs > 0) { 1783 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1784 if (cpvalues == MAT_COPY_VALUES) { 1785 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } else { 1787 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1788 } 1789 } 1790 1791 c->sorted = a->sorted; 1792 c->roworiented = a->roworiented; 1793 c->nonew = a->nonew; 1794 1795 if (a->diag) { 1796 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1797 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1798 for (i=0; i<mbs; i++) { 1799 c->diag[i] = a->diag[i]; 1800 } 1801 } else c->diag = 0; 1802 c->nz = a->nz; 1803 c->maxnz = a->maxnz; 1804 c->solve_work = 0; 1805 c->mult_work = 0; 1806 c->free_a = PETSC_TRUE; 1807 c->free_ij = PETSC_TRUE; 1808 *B = C; 1809 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1815 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1816 { 1817 Mat_SeqSBAIJ *a; 1818 Mat B; 1819 PetscErrorCode ierr; 1820 int fd; 1821 PetscMPIInt size; 1822 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1823 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1824 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1825 PetscInt *masked,nmask,tmp,bs2,ishift; 1826 PetscScalar *aa; 1827 MPI_Comm comm = ((PetscObject)viewer)->comm; 1828 1829 PetscFunctionBegin; 1830 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1831 bs2 = bs*bs; 1832 1833 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1834 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1835 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1836 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1837 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1838 M = header[1]; N = header[2]; nz = header[3]; 1839 1840 if (header[3] < 0) { 1841 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1842 } 1843 1844 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1845 1846 /* 1847 This code adds extra rows to make sure the number of rows is 1848 divisible by the blocksize 1849 */ 1850 mbs = M/bs; 1851 extra_rows = bs - M + bs*(mbs); 1852 if (extra_rows == bs) extra_rows = 0; 1853 else mbs++; 1854 if (extra_rows) { 1855 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1856 } 1857 1858 /* read in row lengths */ 1859 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1860 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1861 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1862 1863 /* read in column indices */ 1864 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1865 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1866 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1867 1868 /* loop over row lengths determining block row lengths */ 1869 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1870 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1871 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1872 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1873 masked = mask + mbs; 1874 rowcount = 0; nzcount = 0; 1875 for (i=0; i<mbs; i++) { 1876 nmask = 0; 1877 for (j=0; j<bs; j++) { 1878 kmax = rowlengths[rowcount]; 1879 for (k=0; k<kmax; k++) { 1880 tmp = jj[nzcount++]/bs; /* block col. index */ 1881 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1882 } 1883 rowcount++; 1884 } 1885 s_browlengths[i] += nmask; 1886 1887 /* zero out the mask elements we set */ 1888 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1889 } 1890 1891 /* create our matrix */ 1892 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1893 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1894 ierr = MatSetType(B,type);CHKERRQ(ierr); 1895 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1896 a = (Mat_SeqSBAIJ*)B->data; 1897 1898 /* set matrix "i" values */ 1899 a->i[0] = 0; 1900 for (i=1; i<= mbs; i++) { 1901 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1902 a->ilen[i-1] = s_browlengths[i-1]; 1903 } 1904 a->nz = a->i[mbs]; 1905 1906 /* read in nonzero values */ 1907 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1908 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1909 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1910 1911 /* set "a" and "j" values into matrix */ 1912 nzcount = 0; jcount = 0; 1913 for (i=0; i<mbs; i++) { 1914 nzcountb = nzcount; 1915 nmask = 0; 1916 for (j=0; j<bs; j++) { 1917 kmax = rowlengths[i*bs+j]; 1918 for (k=0; k<kmax; k++) { 1919 tmp = jj[nzcount++]/bs; /* block col. index */ 1920 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1921 } 1922 } 1923 /* sort the masked values */ 1924 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1925 1926 /* set "j" values into matrix */ 1927 maskcount = 1; 1928 for (j=0; j<nmask; j++) { 1929 a->j[jcount++] = masked[j]; 1930 mask[masked[j]] = maskcount++; 1931 } 1932 1933 /* set "a" values into matrix */ 1934 ishift = bs2*a->i[i]; 1935 for (j=0; j<bs; j++) { 1936 kmax = rowlengths[i*bs+j]; 1937 for (k=0; k<kmax; k++) { 1938 tmp = jj[nzcountb]/bs ; /* block col. index */ 1939 if (tmp >= i){ 1940 block = mask[tmp] - 1; 1941 point = jj[nzcountb] - bs*tmp; 1942 idx = ishift + bs2*block + j + bs*point; 1943 a->a[idx] = aa[nzcountb]; 1944 } 1945 nzcountb++; 1946 } 1947 } 1948 /* zero out the mask elements we set */ 1949 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1950 } 1951 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1952 1953 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1954 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1955 ierr = PetscFree(aa);CHKERRQ(ierr); 1956 ierr = PetscFree(jj);CHKERRQ(ierr); 1957 ierr = PetscFree(mask);CHKERRQ(ierr); 1958 1959 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1960 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1961 ierr = MatView_Private(B);CHKERRQ(ierr); 1962 *A = B; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1968 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1969 { 1970 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1971 MatScalar *aa=a->a,*v,*v1; 1972 PetscScalar *x,*b,*t,sum,d; 1973 PetscErrorCode ierr; 1974 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1975 PetscInt nz,nz1,*vj,*vj1,i; 1976 1977 PetscFunctionBegin; 1978 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1979 1980 its = its*lits; 1981 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1982 1983 if (bs > 1) 1984 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1985 1986 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1987 if (xx != bb) { 1988 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1989 } else { 1990 b = x; 1991 } 1992 1993 if (!a->relax_work) { 1994 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1995 } 1996 t = a->relax_work; 1997 1998 if (flag & SOR_ZERO_INITIAL_GUESS) { 1999 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2000 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2001 2002 for (i=0; i<m; i++){ 2003 d = *(aa + ai[i]); /* diag[i] */ 2004 v = aa + ai[i] + 1; 2005 vj = aj + ai[i] + 1; 2006 nz = ai[i+1] - ai[i] - 1; 2007 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2008 x[i] = omega*t[i]/d; 2009 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2010 } 2011 } 2012 2013 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2014 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2015 t = b; 2016 } 2017 2018 for (i=m-1; i>=0; i--){ 2019 d = *(aa + ai[i]); 2020 v = aa + ai[i] + 1; 2021 vj = aj + ai[i] + 1; 2022 nz = ai[i+1] - ai[i] - 1; 2023 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2024 sum = t[i]; 2025 while (nz--) sum -= x[*vj++]*(*v++); 2026 x[i] = (1-omega)*x[i] + omega*sum/d; 2027 } 2028 t = a->relax_work; 2029 } 2030 its--; 2031 } 2032 2033 while (its--) { 2034 /* 2035 forward sweep: 2036 for i=0,...,m-1: 2037 sum[i] = (b[i] - U(i,:)x )/d[i]; 2038 x[i] = (1-omega)x[i] + omega*sum[i]; 2039 b = b - x[i]*U^T(i,:); 2040 2041 */ 2042 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2043 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2044 2045 for (i=0; i<m; i++){ 2046 d = *(aa + ai[i]); /* diag[i] */ 2047 v = aa + ai[i] + 1; v1=v; 2048 vj = aj + ai[i] + 1; vj1=vj; 2049 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2050 sum = t[i]; 2051 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2052 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2053 x[i] = (1-omega)*x[i] + omega*sum/d; 2054 while (nz--) t[*vj++] -= x[i]*(*v++); 2055 } 2056 } 2057 2058 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2059 /* 2060 backward sweep: 2061 b = b - x[i]*U^T(i,:), i=0,...,n-2 2062 for i=m-1,...,0: 2063 sum[i] = (b[i] - U(i,:)x )/d[i]; 2064 x[i] = (1-omega)x[i] + omega*sum[i]; 2065 */ 2066 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2067 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2068 2069 for (i=0; i<m-1; i++){ /* update rhs */ 2070 v = aa + ai[i] + 1; 2071 vj = aj + ai[i] + 1; 2072 nz = ai[i+1] - ai[i] - 1; 2073 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2074 while (nz--) t[*vj++] -= x[i]*(*v++); 2075 } 2076 for (i=m-1; i>=0; i--){ 2077 d = *(aa + ai[i]); 2078 v = aa + ai[i] + 1; 2079 vj = aj + ai[i] + 1; 2080 nz = ai[i+1] - ai[i] - 1; 2081 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2082 sum = t[i]; 2083 while (nz--) sum -= x[*vj++]*(*v++); 2084 x[i] = (1-omega)*x[i] + omega*sum/d; 2085 } 2086 } 2087 } 2088 2089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2090 if (bb != xx) { 2091 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2098 /*@ 2099 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2100 (upper triangular entries in CSR format) provided by the user. 2101 2102 Collective on MPI_Comm 2103 2104 Input Parameters: 2105 + comm - must be an MPI communicator of size 1 2106 . bs - size of block 2107 . m - number of rows 2108 . n - number of columns 2109 . i - row indices 2110 . j - column indices 2111 - a - matrix values 2112 2113 Output Parameter: 2114 . mat - the matrix 2115 2116 Level: intermediate 2117 2118 Notes: 2119 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2120 once the matrix is destroyed 2121 2122 You cannot set new nonzero locations into this matrix, that will generate an error. 2123 2124 The i and j indices are 0 based 2125 2126 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2127 2128 @*/ 2129 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2130 { 2131 PetscErrorCode ierr; 2132 PetscInt ii; 2133 Mat_SeqSBAIJ *sbaij; 2134 2135 PetscFunctionBegin; 2136 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2137 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2138 2139 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2140 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2141 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2142 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2143 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2144 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2145 2146 sbaij->i = i; 2147 sbaij->j = j; 2148 sbaij->a = a; 2149 sbaij->singlemalloc = PETSC_FALSE; 2150 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2151 sbaij->free_a = PETSC_FALSE; 2152 sbaij->free_ij = PETSC_FALSE; 2153 2154 for (ii=0; ii<m; ii++) { 2155 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2156 #if defined(PETSC_USE_DEBUG) 2157 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2158 #endif 2159 } 2160 #if defined(PETSC_USE_DEBUG) 2161 for (ii=0; ii<sbaij->i[m]; ii++) { 2162 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2163 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2164 } 2165 #endif 2166 2167 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2168 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 2173 2174 2175 2176