1 /*$Id: baij.c,v 1.236 2001/07/12 23:47:46 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled=PETSC_FALSE,single_prec,flg; 217 int ierr; 218 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 219 ierr = PetscOptionsGetLogical(PETSC_NULL,"-mat_single_precision_solves",&single_prec,&flg);CHKERRQ(ierr); 220 if (sse_enabled && flg && single_prec) { 221 a->single_precision_solves = PETSC_TRUE; 222 A->ops->solve = MatSolve_SeqBAIJ_Update; 223 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 224 break; 225 } 226 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 227 } 228 break; 229 default: 230 SETERRQ(PETSC_ERR_SUP,"unknown option"); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 237 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 238 { 239 PetscFunctionBegin; 240 if (m) *m = 0; 241 if (n) *n = A->m; 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatGetRow_SeqBAIJ" 247 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 248 { 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 250 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 251 MatScalar *aa,*aa_i; 252 Scalar *v_i; 253 254 PetscFunctionBegin; 255 bs = a->bs; 256 ai = a->i; 257 aj = a->j; 258 aa = a->a; 259 bs2 = a->bs2; 260 261 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 262 263 bn = row/bs; /* Block number */ 264 bp = row % bs; /* Block Position */ 265 M = ai[bn+1] - ai[bn]; 266 *nz = bs*M; 267 268 if (v) { 269 *v = 0; 270 if (*nz) { 271 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 272 for (i=0; i<M; i++) { /* for each block in the block row */ 273 v_i = *v + i*bs; 274 aa_i = aa + bs2*(ai[bn] + i); 275 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 276 } 277 } 278 } 279 280 if (idx) { 281 *idx = 0; 282 if (*nz) { 283 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 284 for (i=0; i<M; i++) { /* for each block in the block row */ 285 idx_i = *idx + i*bs; 286 itmp = bs*aj[ai[bn] + i]; 287 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 288 } 289 } 290 } 291 PetscFunctionReturn(0); 292 } 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 296 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 297 { 298 int ierr; 299 300 PetscFunctionBegin; 301 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 302 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatTranspose_SeqBAIJ" 308 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 309 { 310 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 311 Mat C; 312 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 313 int *rows,*cols,bs2=a->bs2; 314 Scalar *array; 315 316 PetscFunctionBegin; 317 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 318 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 319 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 320 321 #if defined(PETSC_USE_MAT_SINGLE) 322 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 323 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 324 #else 325 array = a->a; 326 #endif 327 328 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 329 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 330 ierr = PetscFree(col);CHKERRQ(ierr); 331 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 332 cols = rows + bs; 333 for (i=0; i<mbs; i++) { 334 cols[0] = i*bs; 335 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 336 len = ai[i+1] - ai[i]; 337 for (j=0; j<len; j++) { 338 rows[0] = (*aj++)*bs; 339 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 340 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 341 array += bs2; 342 } 343 } 344 ierr = PetscFree(rows);CHKERRQ(ierr); 345 #if defined(PETSC_USE_MAT_SINGLE) 346 ierr = PetscFree(array); 347 #endif 348 349 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 350 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351 352 if (B) { 353 *B = C; 354 } else { 355 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 362 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 363 { 364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 365 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 366 Scalar *aa; 367 FILE *file; 368 369 PetscFunctionBegin; 370 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 371 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 372 col_lens[0] = MAT_COOKIE; 373 374 col_lens[1] = A->m; 375 col_lens[2] = A->n; 376 col_lens[3] = a->nz*bs2; 377 378 /* store lengths of each row and write (including header) to file */ 379 count = 0; 380 for (i=0; i<a->mbs; i++) { 381 for (j=0; j<bs; j++) { 382 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 383 } 384 } 385 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 386 ierr = PetscFree(col_lens);CHKERRQ(ierr); 387 388 /* store column indices (zero start index) */ 389 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 390 count = 0; 391 for (i=0; i<a->mbs; i++) { 392 for (j=0; j<bs; j++) { 393 for (k=a->i[i]; k<a->i[i+1]; k++) { 394 for (l=0; l<bs; l++) { 395 jj[count++] = bs*a->j[k] + l; 396 } 397 } 398 } 399 } 400 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 401 ierr = PetscFree(jj);CHKERRQ(ierr); 402 403 /* store nonzero values */ 404 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 405 count = 0; 406 for (i=0; i<a->mbs; i++) { 407 for (j=0; j<bs; j++) { 408 for (k=a->i[i]; k<a->i[i+1]; k++) { 409 for (l=0; l<bs; l++) { 410 aa[count++] = a->a[bs2*k + l*bs + j]; 411 } 412 } 413 } 414 } 415 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 416 ierr = PetscFree(aa);CHKERRQ(ierr); 417 418 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 419 if (file) { 420 fprintf(file,"-matload_block_size %d\n",a->bs); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 427 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 431 PetscViewerFormat format; 432 433 PetscFunctionBegin; 434 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 435 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 436 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 437 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 438 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 439 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 440 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 441 for (i=0; i<a->mbs; i++) { 442 for (j=0; j<bs; j++) { 443 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 444 for (k=a->i[i]; k<a->i[i+1]; k++) { 445 for (l=0; l<bs; l++) { 446 #if defined(PETSC_USE_COMPLEX) 447 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 448 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 449 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 450 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 455 } 456 #else 457 if (a->a[bs2*k + l*bs + j] != 0.0) { 458 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 459 } 460 #endif 461 } 462 } 463 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 467 } else { 468 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 469 for (i=0; i<a->mbs; i++) { 470 for (j=0; j<bs; j++) { 471 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 472 for (k=a->i[i]; k<a->i[i+1]; k++) { 473 for (l=0; l<bs; l++) { 474 #if defined(PETSC_USE_COMPLEX) 475 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 476 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 477 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 478 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 483 } 484 #else 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 486 #endif 487 } 488 } 489 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 493 } 494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 500 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 501 { 502 Mat A = (Mat) Aa; 503 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 504 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 505 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 506 MatScalar *aa; 507 PetscViewer viewer; 508 509 PetscFunctionBegin; 510 511 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 512 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 513 514 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 515 516 /* loop over matrix elements drawing boxes */ 517 color = PETSC_DRAW_BLUE; 518 for (i=0,row=0; i<mbs; i++,row+=bs) { 519 for (j=a->i[i]; j<a->i[i+1]; j++) { 520 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 521 x_l = a->j[j]*bs; x_r = x_l + 1.0; 522 aa = a->a + j*bs2; 523 for (k=0; k<bs; k++) { 524 for (l=0; l<bs; l++) { 525 if (PetscRealPart(*aa++) >= 0.) continue; 526 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 color = PETSC_DRAW_CYAN; 532 for (i=0,row=0; i<mbs; i++,row+=bs) { 533 for (j=a->i[i]; j<a->i[i+1]; j++) { 534 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 535 x_l = a->j[j]*bs; x_r = x_l + 1.0; 536 aa = a->a + j*bs2; 537 for (k=0; k<bs; k++) { 538 for (l=0; l<bs; l++) { 539 if (PetscRealPart(*aa++) != 0.) continue; 540 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 541 } 542 } 543 } 544 } 545 546 color = PETSC_DRAW_RED; 547 for (i=0,row=0; i<mbs; i++,row+=bs) { 548 for (j=a->i[i]; j<a->i[i+1]; j++) { 549 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 550 x_l = a->j[j]*bs; x_r = x_l + 1.0; 551 aa = a->a + j*bs2; 552 for (k=0; k<bs; k++) { 553 for (l=0; l<bs; l++) { 554 if (PetscRealPart(*aa++) <= 0.) continue; 555 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 556 } 557 } 558 } 559 } 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 565 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 566 { 567 int ierr; 568 PetscReal xl,yl,xr,yr,w,h; 569 PetscDraw draw; 570 PetscTruth isnull; 571 572 PetscFunctionBegin; 573 574 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 575 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 576 577 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 578 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 579 xr += w; yr += h; xl = -w; yl = -h; 580 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 581 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 582 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatView_SeqBAIJ" 588 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 589 { 590 int ierr; 591 PetscTruth issocket,isascii,isbinary,isdraw; 592 593 PetscFunctionBegin; 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 598 if (issocket) { 599 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 600 } else if (isascii){ 601 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 602 } else if (isbinary) { 603 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 604 } else if (isdraw) { 605 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatGetValues_SeqBAIJ" 615 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 616 { 617 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 618 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 619 int *ai = a->i,*ailen = a->ilen; 620 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 621 MatScalar *ap,*aa = a->a,zero = 0.0; 622 623 PetscFunctionBegin; 624 for (k=0; k<m; k++) { /* loop over rows */ 625 row = im[k]; brow = row/bs; 626 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 627 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 628 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 629 nrow = ailen[brow]; 630 for (l=0; l<n; l++) { /* loop over columns */ 631 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 632 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 633 col = in[l] ; 634 bcol = col/bs; 635 cidx = col%bs; 636 ridx = row%bs; 637 high = nrow; 638 low = 0; /* assume unsorted */ 639 while (high-low > 5) { 640 t = (low+high)/2; 641 if (rp[t] > bcol) high = t; 642 else low = t; 643 } 644 for (i=low; i<high; i++) { 645 if (rp[i] > bcol) break; 646 if (rp[i] == bcol) { 647 *v++ = ap[bs2*i+bs*cidx+ridx]; 648 goto finished; 649 } 650 } 651 *v++ = zero; 652 finished:; 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657 658 #if defined(PETSC_USE_MAT_SINGLE) 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 661 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 662 { 663 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 664 int ierr,i,N = m*n*b->bs2; 665 MatScalar *vsingle; 666 667 PetscFunctionBegin; 668 if (N > b->setvalueslen) { 669 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 670 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 671 b->setvalueslen = N; 672 } 673 vsingle = b->setvaluescopy; 674 for (i=0; i<N; i++) { 675 vsingle[i] = v[i]; 676 } 677 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 #endif 681 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 685 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 686 { 687 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 688 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 689 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 690 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 691 PetscTruth roworiented=a->roworiented; 692 MatScalar *value = v,*ap,*aa = a->a,*bap; 693 694 PetscFunctionBegin; 695 if (roworiented) { 696 stepval = (n-1)*bs; 697 } else { 698 stepval = (m-1)*bs; 699 } 700 for (k=0; k<m; k++) { /* loop over added rows */ 701 row = im[k]; 702 if (row < 0) continue; 703 #if defined(PETSC_USE_BOPT_g) 704 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 705 #endif 706 rp = aj + ai[row]; 707 ap = aa + bs2*ai[row]; 708 rmax = imax[row]; 709 nrow = ailen[row]; 710 low = 0; 711 for (l=0; l<n; l++) { /* loop over added columns */ 712 if (in[l] < 0) continue; 713 #if defined(PETSC_USE_BOPT_g) 714 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 715 #endif 716 col = in[l]; 717 if (roworiented) { 718 value = v + k*(stepval+bs)*bs + l*bs; 719 } else { 720 value = v + l*(stepval+bs)*bs + k*bs; 721 } 722 if (!sorted) low = 0; high = nrow; 723 while (high-low > 7) { 724 t = (low+high)/2; 725 if (rp[t] > col) high = t; 726 else low = t; 727 } 728 for (i=low; i<high; i++) { 729 if (rp[i] > col) break; 730 if (rp[i] == col) { 731 bap = ap + bs2*i; 732 if (roworiented) { 733 if (is == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval) { 735 for (jj=ii; jj<bs2; jj+=bs) { 736 bap[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval) { 741 for (jj=ii; jj<bs2; jj+=bs) { 742 bap[jj] = *value++; 743 } 744 } 745 } 746 } else { 747 if (is == ADD_VALUES) { 748 for (ii=0; ii<bs; ii++,value+=stepval) { 749 for (jj=0; jj<bs; jj++) { 750 *bap++ += *value++; 751 } 752 } 753 } else { 754 for (ii=0; ii<bs; ii++,value+=stepval) { 755 for (jj=0; jj<bs; jj++) { 756 *bap++ = *value++; 757 } 758 } 759 } 760 } 761 goto noinsert2; 762 } 763 } 764 if (nonew == 1) goto noinsert2; 765 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 766 if (nrow >= rmax) { 767 /* there is no extra room in row, therefore enlarge */ 768 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 769 MatScalar *new_a; 770 771 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 772 773 /* malloc new storage space */ 774 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 775 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 776 new_j = (int*)(new_a + bs2*new_nz); 777 new_i = new_j + new_nz; 778 779 /* copy over old data into new slots */ 780 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 781 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 782 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 783 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 784 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 785 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 786 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 787 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 788 /* free up old matrix storage */ 789 ierr = PetscFree(a->a);CHKERRQ(ierr); 790 if (!a->singlemalloc) { 791 ierr = PetscFree(a->i);CHKERRQ(ierr); 792 ierr = PetscFree(a->j);CHKERRQ(ierr); 793 } 794 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 795 a->singlemalloc = PETSC_TRUE; 796 797 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 798 rmax = imax[row] = imax[row] + CHUNKSIZE; 799 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 800 a->maxnz += bs2*CHUNKSIZE; 801 a->reallocs++; 802 a->nz++; 803 } 804 N = nrow++ - 1; 805 /* shift up all the later entries in this row */ 806 for (ii=N; ii>=i; ii--) { 807 rp[ii+1] = rp[ii]; 808 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 809 } 810 if (N >= i) { 811 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 rp[i] = col; 814 bap = ap + bs2*i; 815 if (roworiented) { 816 for (ii=0; ii<bs; ii++,value+=stepval) { 817 for (jj=ii; jj<bs2; jj+=bs) { 818 bap[jj] = *value++; 819 } 820 } 821 } else { 822 for (ii=0; ii<bs; ii++,value+=stepval) { 823 for (jj=0; jj<bs; jj++) { 824 *bap++ = *value++; 825 } 826 } 827 } 828 noinsert2:; 829 low = i; 830 } 831 ailen[row] = nrow; 832 } 833 PetscFunctionReturn(0); 834 } 835 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 839 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 840 { 841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 842 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 843 int m = A->m,*ip,N,*ailen = a->ilen; 844 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 845 MatScalar *aa = a->a,*ap; 846 847 PetscFunctionBegin; 848 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 849 850 if (m) rmax = ailen[0]; 851 for (i=1; i<mbs; i++) { 852 /* move each row back by the amount of empty slots (fshift) before it*/ 853 fshift += imax[i-1] - ailen[i-1]; 854 rmax = PetscMax(rmax,ailen[i]); 855 if (fshift) { 856 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 857 N = ailen[i]; 858 for (j=0; j<N; j++) { 859 ip[j-fshift] = ip[j]; 860 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 861 } 862 } 863 ai[i] = ai[i-1] + ailen[i-1]; 864 } 865 if (mbs) { 866 fshift += imax[mbs-1] - ailen[mbs-1]; 867 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 868 } 869 /* reset ilen and imax for each row */ 870 for (i=0; i<mbs; i++) { 871 ailen[i] = imax[i] = ai[i+1] - ai[i]; 872 } 873 a->nz = ai[mbs]; 874 875 /* diagonals may have moved, so kill the diagonal pointers */ 876 if (fshift && a->diag) { 877 ierr = PetscFree(a->diag);CHKERRQ(ierr); 878 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 879 a->diag = 0; 880 } 881 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 882 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 883 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 884 a->reallocs); 885 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 886 a->reallocs = 0; 887 A->info.nz_unneeded = (PetscReal)fshift*bs2; 888 889 PetscFunctionReturn(0); 890 } 891 892 893 894 /* 895 This function returns an array of flags which indicate the locations of contiguous 896 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 897 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 898 Assume: sizes should be long enough to hold all the values. 899 */ 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 902 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 903 { 904 int i,j,k,row; 905 PetscTruth flg; 906 907 PetscFunctionBegin; 908 for (i=0,j=0; i<n; j++) { 909 row = idx[i]; 910 if (row%bs!=0) { /* Not the begining of a block */ 911 sizes[j] = 1; 912 i++; 913 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 914 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 915 i++; 916 } else { /* Begining of the block, so check if the complete block exists */ 917 flg = PETSC_TRUE; 918 for (k=1; k<bs; k++) { 919 if (row+k != idx[i+k]) { /* break in the block */ 920 flg = PETSC_FALSE; 921 break; 922 } 923 } 924 if (flg == PETSC_TRUE) { /* No break in the bs */ 925 sizes[j] = bs; 926 i+= bs; 927 } else { 928 sizes[j] = 1; 929 i++; 930 } 931 } 932 } 933 *bs_max = j; 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 939 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 940 { 941 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 942 int ierr,i,j,k,count,is_n,*is_idx,*rows; 943 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 944 Scalar zero = 0.0; 945 MatScalar *aa; 946 947 PetscFunctionBegin; 948 /* Make a copy of the IS and sort it */ 949 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 950 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 951 952 /* allocate memory for rows,sizes */ 953 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 954 sizes = rows + is_n; 955 956 /* copy IS values to rows, and sort them */ 957 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 958 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 959 if (baij->keepzeroedrows) { 960 for (i=0; i<is_n; i++) { sizes[i] = 1; } 961 bs_max = is_n; 962 } else { 963 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 964 } 965 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 966 967 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 968 row = rows[j]; 969 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 970 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 971 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 972 if (sizes[i] == bs && !baij->keepzeroedrows) { 973 if (diag) { 974 if (baij->ilen[row/bs] > 0) { 975 baij->ilen[row/bs] = 1; 976 baij->j[baij->i[row/bs]] = row/bs; 977 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 978 } 979 /* Now insert all the diagonal values for this bs */ 980 for (k=0; k<bs; k++) { 981 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 982 } 983 } else { /* (!diag) */ 984 baij->ilen[row/bs] = 0; 985 } /* end (!diag) */ 986 } else { /* (sizes[i] != bs) */ 987 #if defined (PETSC_USE_DEBUG) 988 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 989 #endif 990 for (k=0; k<count; k++) { 991 aa[0] = zero; 992 aa += bs; 993 } 994 if (diag) { 995 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 996 } 997 } 998 } 999 1000 ierr = PetscFree(rows);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1007 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1008 { 1009 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1010 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1011 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1012 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1013 int ridx,cidx,bs2=a->bs2,ierr; 1014 PetscTruth roworiented=a->roworiented; 1015 MatScalar *ap,value,*aa=a->a,*bap; 1016 1017 PetscFunctionBegin; 1018 for (k=0; k<m; k++) { /* loop over added rows */ 1019 row = im[k]; brow = row/bs; 1020 if (row < 0) continue; 1021 #if defined(PETSC_USE_BOPT_g) 1022 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1023 #endif 1024 rp = aj + ai[brow]; 1025 ap = aa + bs2*ai[brow]; 1026 rmax = imax[brow]; 1027 nrow = ailen[brow]; 1028 low = 0; 1029 for (l=0; l<n; l++) { /* loop over added columns */ 1030 if (in[l] < 0) continue; 1031 #if defined(PETSC_USE_BOPT_g) 1032 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1033 #endif 1034 col = in[l]; bcol = col/bs; 1035 ridx = row % bs; cidx = col % bs; 1036 if (roworiented) { 1037 value = v[l + k*n]; 1038 } else { 1039 value = v[k + l*m]; 1040 } 1041 if (!sorted) low = 0; high = nrow; 1042 while (high-low > 7) { 1043 t = (low+high)/2; 1044 if (rp[t] > bcol) high = t; 1045 else low = t; 1046 } 1047 for (i=low; i<high; i++) { 1048 if (rp[i] > bcol) break; 1049 if (rp[i] == bcol) { 1050 bap = ap + bs2*i + bs*cidx + ridx; 1051 if (is == ADD_VALUES) *bap += value; 1052 else *bap = value; 1053 goto noinsert1; 1054 } 1055 } 1056 if (nonew == 1) goto noinsert1; 1057 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1058 if (nrow >= rmax) { 1059 /* there is no extra room in row, therefore enlarge */ 1060 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1061 MatScalar *new_a; 1062 1063 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1064 1065 /* Malloc new storage space */ 1066 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1067 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1068 new_j = (int*)(new_a + bs2*new_nz); 1069 new_i = new_j + new_nz; 1070 1071 /* copy over old data into new slots */ 1072 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1073 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1074 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1075 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1076 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1077 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1078 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1079 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1080 /* free up old matrix storage */ 1081 ierr = PetscFree(a->a);CHKERRQ(ierr); 1082 if (!a->singlemalloc) { 1083 ierr = PetscFree(a->i);CHKERRQ(ierr); 1084 ierr = PetscFree(a->j);CHKERRQ(ierr); 1085 } 1086 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1087 a->singlemalloc = PETSC_TRUE; 1088 1089 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1090 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1091 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1092 a->maxnz += bs2*CHUNKSIZE; 1093 a->reallocs++; 1094 a->nz++; 1095 } 1096 N = nrow++ - 1; 1097 /* shift up all the later entries in this row */ 1098 for (ii=N; ii>=i; ii--) { 1099 rp[ii+1] = rp[ii]; 1100 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1101 } 1102 if (N>=i) { 1103 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 rp[i] = bcol; 1106 ap[bs2*i + bs*cidx + ridx] = value; 1107 noinsert1:; 1108 low = i; 1109 } 1110 ailen[brow] = nrow; 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1118 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1121 Mat outA; 1122 int ierr; 1123 PetscTruth row_identity,col_identity; 1124 1125 PetscFunctionBegin; 1126 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1127 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1128 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1129 if (!row_identity || !col_identity) { 1130 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1131 } 1132 1133 outA = inA; 1134 inA->factor = FACTOR_LU; 1135 1136 if (!a->diag) { 1137 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1138 } 1139 /* 1140 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1141 for ILU(0) factorization with natural ordering 1142 */ 1143 if (a->bs < 8) { 1144 ierr = MatSeqBAIJ_UpdateFactor_NaturalOrdering(inA);CHKERRQ(ierr); 1145 } else { 1146 a->row = row; 1147 a->col = col; 1148 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1149 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1150 1151 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1152 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1153 PetscLogObjectParent(inA,a->icol); 1154 1155 if (!a->solve_work) { 1156 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1157 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1158 } 1159 } 1160 1161 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1162 1163 PetscFunctionReturn(0); 1164 } 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1167 int MatPrintHelp_SeqBAIJ(Mat A) 1168 { 1169 static PetscTruth called = PETSC_FALSE; 1170 MPI_Comm comm = A->comm; 1171 int ierr; 1172 1173 PetscFunctionBegin; 1174 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1175 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1176 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 EXTERN_C_BEGIN 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1183 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1184 { 1185 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1186 int i,nz,n; 1187 1188 PetscFunctionBegin; 1189 nz = baij->maxnz; 1190 n = mat->n; 1191 for (i=0; i<nz; i++) { 1192 baij->j[i] = indices[i]; 1193 } 1194 baij->nz = nz; 1195 for (i=0; i<n; i++) { 1196 baij->ilen[i] = baij->imax[i]; 1197 } 1198 1199 PetscFunctionReturn(0); 1200 } 1201 EXTERN_C_END 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1205 /*@ 1206 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1207 in the matrix. 1208 1209 Input Parameters: 1210 + mat - the SeqBAIJ matrix 1211 - indices - the column indices 1212 1213 Level: advanced 1214 1215 Notes: 1216 This can be called if you have precomputed the nonzero structure of the 1217 matrix and want to provide it to the matrix object to improve the performance 1218 of the MatSetValues() operation. 1219 1220 You MUST have set the correct numbers of nonzeros per row in the call to 1221 MatCreateSeqBAIJ(). 1222 1223 MUST be called before any calls to MatSetValues(); 1224 1225 @*/ 1226 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1227 { 1228 int ierr,(*f)(Mat,int *); 1229 1230 PetscFunctionBegin; 1231 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1232 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1233 if (f) { 1234 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1235 } else { 1236 SETERRQ(1,"Wrong type of matrix to set column indices"); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1243 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1244 { 1245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1246 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1247 PetscReal atmp; 1248 Scalar *x,zero = 0.0; 1249 MatScalar *aa; 1250 int ncols,brow,krow,kcol; 1251 1252 PetscFunctionBegin; 1253 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1254 bs = a->bs; 1255 aa = a->a; 1256 ai = a->i; 1257 aj = a->j; 1258 mbs = a->mbs; 1259 1260 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1261 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1262 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1263 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1264 for (i=0; i<mbs; i++) { 1265 ncols = ai[1] - ai[0]; ai++; 1266 brow = bs*i; 1267 for (j=0; j<ncols; j++){ 1268 /* bcol = bs*(*aj); */ 1269 for (kcol=0; kcol<bs; kcol++){ 1270 for (krow=0; krow<bs; krow++){ 1271 atmp = PetscAbsScalar(*aa); aa++; 1272 row = brow + krow; /* row index */ 1273 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1274 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1275 } 1276 } 1277 aj++; 1278 } 1279 } 1280 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1286 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1287 { 1288 int ierr; 1289 1290 PetscFunctionBegin; 1291 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1297 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1298 { 1299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1300 PetscFunctionBegin; 1301 *array = a->a; 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1307 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1308 { 1309 PetscFunctionBegin; 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* -------------------------------------------------------------------*/ 1314 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1315 MatGetRow_SeqBAIJ, 1316 MatRestoreRow_SeqBAIJ, 1317 MatMult_SeqBAIJ_N, 1318 MatMultAdd_SeqBAIJ_N, 1319 MatMultTranspose_SeqBAIJ, 1320 MatMultTransposeAdd_SeqBAIJ, 1321 MatSolve_SeqBAIJ_N, 1322 0, 1323 0, 1324 0, 1325 MatLUFactor_SeqBAIJ, 1326 0, 1327 0, 1328 MatTranspose_SeqBAIJ, 1329 MatGetInfo_SeqBAIJ, 1330 MatEqual_SeqBAIJ, 1331 MatGetDiagonal_SeqBAIJ, 1332 MatDiagonalScale_SeqBAIJ, 1333 MatNorm_SeqBAIJ, 1334 0, 1335 MatAssemblyEnd_SeqBAIJ, 1336 0, 1337 MatSetOption_SeqBAIJ, 1338 MatZeroEntries_SeqBAIJ, 1339 MatZeroRows_SeqBAIJ, 1340 MatLUFactorSymbolic_SeqBAIJ, 1341 MatLUFactorNumeric_SeqBAIJ_N, 1342 0, 1343 0, 1344 MatSetUpPreallocation_SeqBAIJ, 1345 0, 1346 MatGetOwnershipRange_SeqBAIJ, 1347 MatILUFactorSymbolic_SeqBAIJ, 1348 0, 1349 MatGetArray_SeqBAIJ, 1350 MatRestoreArray_SeqBAIJ, 1351 MatDuplicate_SeqBAIJ, 1352 0, 1353 0, 1354 MatILUFactor_SeqBAIJ, 1355 0, 1356 0, 1357 MatGetSubMatrices_SeqBAIJ, 1358 MatIncreaseOverlap_SeqBAIJ, 1359 MatGetValues_SeqBAIJ, 1360 0, 1361 MatPrintHelp_SeqBAIJ, 1362 MatScale_SeqBAIJ, 1363 0, 1364 0, 1365 0, 1366 MatGetBlockSize_SeqBAIJ, 1367 MatGetRowIJ_SeqBAIJ, 1368 MatRestoreRowIJ_SeqBAIJ, 1369 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 MatSetValuesBlocked_SeqBAIJ, 1376 MatGetSubMatrix_SeqBAIJ, 1377 MatDestroy_SeqBAIJ, 1378 MatView_SeqBAIJ, 1379 MatGetMaps_Petsc, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 MatGetRowMax_SeqBAIJ, 1387 MatConvert_Basic}; 1388 1389 EXTERN_C_BEGIN 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1392 int MatStoreValues_SeqBAIJ(Mat mat) 1393 { 1394 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1395 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1396 int ierr; 1397 1398 PetscFunctionBegin; 1399 if (aij->nonew != 1) { 1400 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1401 } 1402 1403 /* allocate space for values if not already there */ 1404 if (!aij->saved_values) { 1405 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1406 } 1407 1408 /* copy values over */ 1409 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 EXTERN_C_END 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1417 int MatRetrieveValues_SeqBAIJ(Mat mat) 1418 { 1419 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1420 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1421 1422 PetscFunctionBegin; 1423 if (aij->nonew != 1) { 1424 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1425 } 1426 if (!aij->saved_values) { 1427 SETERRQ(1,"Must call MatStoreValues(A);first"); 1428 } 1429 1430 /* copy values over */ 1431 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 EXTERN_C_END 1435 1436 EXTERN_C_BEGIN 1437 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1438 EXTERN_C_END 1439 1440 EXTERN_C_BEGIN 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatCreate_SeqBAIJ" 1443 int MatCreate_SeqBAIJ(Mat B) 1444 { 1445 int ierr,size; 1446 Mat_SeqBAIJ *b; 1447 1448 PetscFunctionBegin; 1449 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1450 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1451 1452 B->m = B->M = PetscMax(B->m,B->M); 1453 B->n = B->N = PetscMax(B->n,B->N); 1454 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1455 B->data = (void*)b; 1456 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1457 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1458 B->factor = 0; 1459 B->lupivotthreshold = 1.0; 1460 B->mapping = 0; 1461 b->row = 0; 1462 b->col = 0; 1463 b->icol = 0; 1464 b->reallocs = 0; 1465 b->saved_values = 0; 1466 #if defined(PETSC_USE_MAT_SINGLE) 1467 b->setvalueslen = 0; 1468 b->setvaluescopy = PETSC_NULL; 1469 #endif 1470 b->single_precision_solves = PETSC_FALSE; 1471 1472 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1473 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1474 1475 b->sorted = PETSC_FALSE; 1476 b->roworiented = PETSC_TRUE; 1477 b->nonew = 0; 1478 b->diag = 0; 1479 b->solve_work = 0; 1480 b->mult_work = 0; 1481 b->spptr = 0; 1482 B->info.nz_unneeded = (PetscReal)b->maxnz; 1483 b->keepzeroedrows = PETSC_FALSE; 1484 1485 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1486 "MatStoreValues_SeqBAIJ", 1487 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1489 "MatRetrieveValues_SeqBAIJ", 1490 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1492 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1493 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1495 "MatConvert_SeqBAIJ_SeqAIJ", 1496 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 EXTERN_C_END 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1503 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1504 { 1505 Mat C; 1506 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1507 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1508 1509 PetscFunctionBegin; 1510 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1511 1512 *B = 0; 1513 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1514 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1515 c = (Mat_SeqBAIJ*)C->data; 1516 1517 c->bs = a->bs; 1518 c->bs2 = a->bs2; 1519 c->mbs = a->mbs; 1520 c->nbs = a->nbs; 1521 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1522 1523 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1524 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1525 for (i=0; i<mbs; i++) { 1526 c->imax[i] = a->imax[i]; 1527 c->ilen[i] = a->ilen[i]; 1528 } 1529 1530 /* allocate the matrix space */ 1531 c->singlemalloc = PETSC_TRUE; 1532 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1533 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1534 c->j = (int*)(c->a + nz*bs2); 1535 c->i = c->j + nz; 1536 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1537 if (mbs > 0) { 1538 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1539 if (cpvalues == MAT_COPY_VALUES) { 1540 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1541 } else { 1542 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1543 } 1544 } 1545 1546 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1547 c->sorted = a->sorted; 1548 c->roworiented = a->roworiented; 1549 c->nonew = a->nonew; 1550 1551 if (a->diag) { 1552 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1553 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1554 for (i=0; i<mbs; i++) { 1555 c->diag[i] = a->diag[i]; 1556 } 1557 } else c->diag = 0; 1558 c->nz = a->nz; 1559 c->maxnz = a->maxnz; 1560 c->solve_work = 0; 1561 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1562 c->mult_work = 0; 1563 C->preallocated = PETSC_TRUE; 1564 C->assembled = PETSC_TRUE; 1565 *B = C; 1566 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatLoad_SeqBAIJ" 1573 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1574 { 1575 Mat_SeqBAIJ *a; 1576 Mat B; 1577 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1578 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1579 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1580 int *masked,nmask,tmp,bs2,ishift; 1581 Scalar *aa; 1582 MPI_Comm comm = ((PetscObject)viewer)->comm; 1583 1584 PetscFunctionBegin; 1585 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1586 bs2 = bs*bs; 1587 1588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1589 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1590 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1591 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1592 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1593 M = header[1]; N = header[2]; nz = header[3]; 1594 1595 if (header[3] < 0) { 1596 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1597 } 1598 1599 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1600 1601 /* 1602 This code adds extra rows to make sure the number of rows is 1603 divisible by the blocksize 1604 */ 1605 mbs = M/bs; 1606 extra_rows = bs - M + bs*(mbs); 1607 if (extra_rows == bs) extra_rows = 0; 1608 else mbs++; 1609 if (extra_rows) { 1610 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1611 } 1612 1613 /* read in row lengths */ 1614 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1615 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1616 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1617 1618 /* read in column indices */ 1619 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1620 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1621 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1622 1623 /* loop over row lengths determining block row lengths */ 1624 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1625 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1626 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1627 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1628 masked = mask + mbs; 1629 rowcount = 0; nzcount = 0; 1630 for (i=0; i<mbs; i++) { 1631 nmask = 0; 1632 for (j=0; j<bs; j++) { 1633 kmax = rowlengths[rowcount]; 1634 for (k=0; k<kmax; k++) { 1635 tmp = jj[nzcount++]/bs; 1636 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1637 } 1638 rowcount++; 1639 } 1640 browlengths[i] += nmask; 1641 /* zero out the mask elements we set */ 1642 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1643 } 1644 1645 /* create our matrix */ 1646 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1647 B = *A; 1648 a = (Mat_SeqBAIJ*)B->data; 1649 1650 /* set matrix "i" values */ 1651 a->i[0] = 0; 1652 for (i=1; i<= mbs; i++) { 1653 a->i[i] = a->i[i-1] + browlengths[i-1]; 1654 a->ilen[i-1] = browlengths[i-1]; 1655 } 1656 a->nz = 0; 1657 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1658 1659 /* read in nonzero values */ 1660 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1661 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1662 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1663 1664 /* set "a" and "j" values into matrix */ 1665 nzcount = 0; jcount = 0; 1666 for (i=0; i<mbs; i++) { 1667 nzcountb = nzcount; 1668 nmask = 0; 1669 for (j=0; j<bs; j++) { 1670 kmax = rowlengths[i*bs+j]; 1671 for (k=0; k<kmax; k++) { 1672 tmp = jj[nzcount++]/bs; 1673 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1674 } 1675 } 1676 /* sort the masked values */ 1677 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1678 1679 /* set "j" values into matrix */ 1680 maskcount = 1; 1681 for (j=0; j<nmask; j++) { 1682 a->j[jcount++] = masked[j]; 1683 mask[masked[j]] = maskcount++; 1684 } 1685 /* set "a" values into matrix */ 1686 ishift = bs2*a->i[i]; 1687 for (j=0; j<bs; j++) { 1688 kmax = rowlengths[i*bs+j]; 1689 for (k=0; k<kmax; k++) { 1690 tmp = jj[nzcountb]/bs ; 1691 block = mask[tmp] - 1; 1692 point = jj[nzcountb] - bs*tmp; 1693 idx = ishift + bs2*block + j + bs*point; 1694 a->a[idx] = (MatScalar)aa[nzcountb++]; 1695 } 1696 } 1697 /* zero out the mask elements we set */ 1698 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1699 } 1700 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1701 1702 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1703 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1704 ierr = PetscFree(aa);CHKERRQ(ierr); 1705 ierr = PetscFree(jj);CHKERRQ(ierr); 1706 ierr = PetscFree(mask);CHKERRQ(ierr); 1707 1708 B->assembled = PETSC_TRUE; 1709 1710 ierr = MatView_Private(B);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 EXTERN_C_END 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatCreateSeqBAIJ" 1717 /*@C 1718 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1719 compressed row) format. For good matrix assembly performance the 1720 user should preallocate the matrix storage by setting the parameter nz 1721 (or the array nnz). By setting these parameters accurately, performance 1722 during matrix assembly can be increased by more than a factor of 50. 1723 1724 Collective on MPI_Comm 1725 1726 Input Parameters: 1727 + comm - MPI communicator, set to PETSC_COMM_SELF 1728 . bs - size of block 1729 . m - number of rows 1730 . n - number of columns 1731 . nz - number of nonzero blocks per block row (same for all rows) 1732 - nnz - array containing the number of nonzero blocks in the various block rows 1733 (possibly different for each block row) or PETSC_NULL 1734 1735 Output Parameter: 1736 . A - the matrix 1737 1738 Options Database Keys: 1739 . -mat_no_unroll - uses code that does not unroll the loops in the 1740 block calculations (much slower) 1741 . -mat_block_size - size of the blocks to use 1742 1743 Level: intermediate 1744 1745 Notes: 1746 A nonzero block is any block that as 1 or more nonzeros in it 1747 1748 The block AIJ format is fully compatible with standard Fortran 77 1749 storage. That is, the stored row and column indices can begin at 1750 either one (as in Fortran) or zero. See the users' manual for details. 1751 1752 Specify the preallocated storage with either nz or nnz (not both). 1753 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1754 allocation. For additional details, see the users manual chapter on 1755 matrices. 1756 1757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1758 @*/ 1759 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1760 { 1761 int ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1765 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1766 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1772 /*@C 1773 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1774 per row in the matrix. For good matrix assembly performance the 1775 user should preallocate the matrix storage by setting the parameter nz 1776 (or the array nnz). By setting these parameters accurately, performance 1777 during matrix assembly can be increased by more than a factor of 50. 1778 1779 Collective on MPI_Comm 1780 1781 Input Parameters: 1782 + A - the matrix 1783 . bs - size of block 1784 . nz - number of block nonzeros per block row (same for all rows) 1785 - nnz - array containing the number of block nonzeros in the various block rows 1786 (possibly different for each block row) or PETSC_NULL 1787 1788 Options Database Keys: 1789 . -mat_no_unroll - uses code that does not unroll the loops in the 1790 block calculations (much slower) 1791 . -mat_block_size - size of the blocks to use 1792 1793 Level: intermediate 1794 1795 Notes: 1796 The block AIJ format is fully compatible with standard Fortran 77 1797 storage. That is, the stored row and column indices can begin at 1798 either one (as in Fortran) or zero. See the users' manual for details. 1799 1800 Specify the preallocated storage with either nz or nnz (not both). 1801 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1802 allocation. For additional details, see the users manual chapter on 1803 matrices. 1804 1805 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1806 @*/ 1807 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1808 { 1809 Mat_SeqBAIJ *b; 1810 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1811 PetscTruth flg; 1812 1813 PetscFunctionBegin; 1814 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1815 if (!flg) PetscFunctionReturn(0); 1816 1817 B->preallocated = PETSC_TRUE; 1818 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1819 if (nnz && newbs != bs) { 1820 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1821 } 1822 bs = newbs; 1823 1824 mbs = B->m/bs; 1825 nbs = B->n/bs; 1826 bs2 = bs*bs; 1827 1828 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1829 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1830 } 1831 1832 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1833 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1834 if (nnz) { 1835 for (i=0; i<mbs; i++) { 1836 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1837 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1838 } 1839 } 1840 1841 b = (Mat_SeqBAIJ*)B->data; 1842 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1843 B->ops->solve = MatSolve_SeqBAIJ_Update; 1844 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1845 if (!flg) { 1846 switch (bs) { 1847 case 1: 1848 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1849 B->ops->mult = MatMult_SeqBAIJ_1; 1850 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1851 break; 1852 case 2: 1853 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1854 B->ops->mult = MatMult_SeqBAIJ_2; 1855 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1856 break; 1857 case 3: 1858 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1859 B->ops->mult = MatMult_SeqBAIJ_3; 1860 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1861 break; 1862 case 4: 1863 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1864 B->ops->mult = MatMult_SeqBAIJ_4; 1865 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1866 break; 1867 case 5: 1868 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1869 B->ops->mult = MatMult_SeqBAIJ_5; 1870 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1871 break; 1872 case 6: 1873 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1874 B->ops->mult = MatMult_SeqBAIJ_6; 1875 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1876 break; 1877 case 7: 1878 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1879 B->ops->mult = MatMult_SeqBAIJ_7; 1880 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1881 break; 1882 default: 1883 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1884 B->ops->mult = MatMult_SeqBAIJ_N; 1885 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1886 break; 1887 } 1888 } 1889 b->bs = bs; 1890 b->mbs = mbs; 1891 b->nbs = nbs; 1892 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1893 if (!nnz) { 1894 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1895 else if (nz <= 0) nz = 1; 1896 for (i=0; i<mbs; i++) b->imax[i] = nz; 1897 nz = nz*mbs; 1898 } else { 1899 nz = 0; 1900 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1901 } 1902 1903 /* allocate the matrix space */ 1904 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1905 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1906 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1907 b->j = (int*)(b->a + nz*bs2); 1908 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1909 b->i = b->j + nz; 1910 b->singlemalloc = PETSC_TRUE; 1911 1912 b->i[0] = 0; 1913 for (i=1; i<mbs+1; i++) { 1914 b->i[i] = b->i[i-1] + b->imax[i-1]; 1915 } 1916 1917 /* b->ilen will count nonzeros in each block row so far. */ 1918 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1919 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1920 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1921 1922 b->bs = bs; 1923 b->bs2 = bs2; 1924 b->mbs = mbs; 1925 b->nz = 0; 1926 b->maxnz = nz*bs2; 1927 B->info.nz_unneeded = (PetscReal)b->maxnz; 1928 PetscFunctionReturn(0); 1929 } 1930 1931