1 /*$Id: baij.c,v 1.190 1999/11/19 20:35:39 bsmith Exp balay $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int *diag = a->diag, *jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 for ( i=0; i<a->mbs; i++ ) { 27 if (jj[diag[i]] != i) { 28 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 29 } 30 } 31 PetscFunctionReturn(0); 32 } 33 34 #undef __FUNC__ 35 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 36 int MatMarkDiagonal_SeqBAIJ(Mat A) 37 { 38 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 39 int i,j, *diag, m = a->mbs; 40 41 PetscFunctionBegin; 42 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 43 PLogObjectMemory(A,(m+1)*sizeof(int)); 44 for ( i=0; i<m; i++ ) { 45 diag[i] = a->i[i+1]; 46 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 47 if (a->j[j] == i) { 48 diag[i] = j; 49 break; 50 } 51 } 52 } 53 a->diag = diag; 54 PetscFunctionReturn(0); 55 } 56 57 58 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 59 60 #undef __FUNC__ 61 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 62 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 63 PetscTruth *done) 64 { 65 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 66 int ierr, n = a->mbs,i; 67 68 PetscFunctionBegin; 69 *nn = n; 70 if (!ia) PetscFunctionReturn(0); 71 if (symmetric) { 72 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 73 } else if (oshift == 1) { 74 /* temporarily add 1 to i and j indices */ 75 int nz = a->i[n] + 1; 76 for ( i=0; i<nz; i++ ) a->j[i]++; 77 for ( i=0; i<n+1; i++ ) a->i[i]++; 78 *ia = a->i; *ja = a->j; 79 } else { 80 *ia = a->i; *ja = a->j; 81 } 82 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNC__ 87 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 88 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 89 PetscTruth *done) 90 { 91 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 92 int i,n = a->mbs,ierr; 93 94 PetscFunctionBegin; 95 if (!ia) PetscFunctionReturn(0); 96 if (symmetric) { 97 ierr = PetscFree(*ia);CHKERRQ(ierr); 98 ierr = PetscFree(*ja);CHKERRQ(ierr); 99 } else if (oshift == 1) { 100 int nz = a->i[n]; 101 for ( i=0; i<nz; i++ ) a->j[i]--; 102 for ( i=0; i<n+1; i++ ) a->i[i]--; 103 } 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 109 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 110 { 111 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 112 113 PetscFunctionBegin; 114 *bs = baij->bs; 115 PetscFunctionReturn(0); 116 } 117 118 119 #undef __FUNC__ 120 #define __FUNC__ "MatDestroy_SeqBAIJ" 121 int MatDestroy_SeqBAIJ(Mat A) 122 { 123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 124 int ierr; 125 126 PetscFunctionBegin; 127 if (A->mapping) { 128 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 129 } 130 if (A->bmapping) { 131 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 132 } 133 if (A->rmap) { 134 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 135 } 136 if (A->cmap) { 137 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 138 } 139 #if defined(PETSC_USE_LOG) 140 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 141 #endif 142 ierr = PetscFree(a->a);CHKERRQ(ierr); 143 if (!a->singlemalloc) { 144 ierr = PetscFree(a->i);CHKERRQ(ierr); 145 ierr = PetscFree(a->j);CHKERRQ(ierr); 146 } 147 if (a->row) { 148 ierr = ISDestroy(a->row);CHKERRQ(ierr); 149 } 150 if (a->col) { 151 ierr = ISDestroy(a->col);CHKERRQ(ierr); 152 } 153 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 154 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 155 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 156 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 157 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 158 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 159 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 160 ierr = PetscFree(a);CHKERRQ(ierr); 161 PLogObjectDestroy(A); 162 PetscHeaderDestroy(A); 163 PetscFunctionReturn(0); 164 } 165 166 #undef __FUNC__ 167 #define __FUNC__ "MatSetOption_SeqBAIJ" 168 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 169 { 170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 171 172 PetscFunctionBegin; 173 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 174 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 175 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 176 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 177 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 178 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 179 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 180 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 181 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 182 else if (op == MAT_ROWS_SORTED || 183 op == MAT_ROWS_UNSORTED || 184 op == MAT_SYMMETRIC || 185 op == MAT_STRUCTURALLY_SYMMETRIC || 186 op == MAT_YES_NEW_DIAGONALS || 187 op == MAT_IGNORE_OFF_PROC_ENTRIES || 188 op == MAT_USE_HASH_TABLE) { 189 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 190 } else if (op == MAT_NO_NEW_DIAGONALS) { 191 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 192 } else { 193 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 199 #undef __FUNC__ 200 #define __FUNC__ "MatGetSize_SeqBAIJ" 201 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 202 { 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 204 205 PetscFunctionBegin; 206 if (m) *m = a->m; 207 if (n) *n = a->n; 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNC__ 212 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 213 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 214 { 215 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 216 217 PetscFunctionBegin; 218 *m = 0; *n = a->m; 219 PetscFunctionReturn(0); 220 } 221 222 #undef __FUNC__ 223 #define __FUNC__ "MatGetRow_SeqBAIJ" 224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 228 MatScalar *aa,*aa_i; 229 Scalar *v_i; 230 231 PetscFunctionBegin; 232 bs = a->bs; 233 ai = a->i; 234 aj = a->j; 235 aa = a->a; 236 bs2 = a->bs2; 237 238 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 239 240 bn = row/bs; /* Block number */ 241 bp = row % bs; /* Block Position */ 242 M = ai[bn+1] - ai[bn]; 243 *nz = bs*M; 244 245 if (v) { 246 *v = 0; 247 if (*nz) { 248 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 249 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 250 v_i = *v + i*bs; 251 aa_i = aa + bs2*(ai[bn] + i); 252 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 253 } 254 } 255 } 256 257 if (idx) { 258 *idx = 0; 259 if (*nz) { 260 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 261 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 262 idx_i = *idx + i*bs; 263 itmp = bs*aj[ai[bn] + i]; 264 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 273 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 274 { 275 int ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNC__ 284 #define __FUNC__ "MatTranspose_SeqBAIJ" 285 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 286 { 287 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 288 Mat C; 289 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 290 int *rows,*cols,bs2=a->bs2; 291 MatScalar *array = a->a; 292 293 PetscFunctionBegin; 294 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 295 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 296 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 297 298 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 299 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 300 ierr = PetscFree(col);CHKERRQ(ierr); 301 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 302 cols = rows + bs; 303 for ( i=0; i<mbs; i++ ) { 304 cols[0] = i*bs; 305 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 306 len = ai[i+1] - ai[i]; 307 for ( j=0; j<len; j++ ) { 308 rows[0] = (*aj++)*bs; 309 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 310 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 311 array += bs2; 312 } 313 } 314 ierr = PetscFree(rows);CHKERRQ(ierr); 315 316 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 317 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 318 319 if (B) { 320 *B = C; 321 } else { 322 PetscOps *Abops; 323 MatOps Aops; 324 325 /* This isn't really an in-place transpose */ 326 ierr = PetscFree(a->a);CHKERRQ(ierr); 327 if (!a->singlemalloc) { 328 ierr = PetscFree(a->i);CHKERRQ(ierr); 329 ierr = PetscFree(a->j);CHKERRQ(ierr); 330 } 331 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 332 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 333 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 334 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 335 ierr = PetscFree(a);CHKERRQ(ierr); 336 337 338 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 339 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 340 341 /* 342 This is horrible, horrible code. We need to keep the 343 A pointers for the bops and ops but copy everything 344 else from C. 345 */ 346 Abops = A->bops; 347 Aops = A->ops; 348 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 349 A->bops = Abops; 350 A->ops = Aops; 351 A->qlist = 0; 352 353 PetscHeaderDestroy(C); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNC__ 359 #define __FUNC__ "MatView_SeqBAIJ_Binary" 360 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 361 { 362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 363 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 364 Scalar *aa; 365 FILE *file; 366 367 PetscFunctionBegin; 368 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 369 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 370 col_lens[0] = MAT_COOKIE; 371 372 col_lens[1] = a->m; 373 col_lens[2] = a->n; 374 col_lens[3] = a->nz*bs2; 375 376 /* store lengths of each row and write (including header) to file */ 377 count = 0; 378 for ( i=0; i<a->mbs; i++ ) { 379 for ( j=0; j<bs; j++ ) { 380 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 381 } 382 } 383 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 384 ierr = PetscFree(col_lens);CHKERRQ(ierr); 385 386 /* store column indices (zero start index) */ 387 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 388 count = 0; 389 for ( i=0; i<a->mbs; i++ ) { 390 for ( j=0; j<bs; j++ ) { 391 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 392 for ( l=0; l<bs; l++ ) { 393 jj[count++] = bs*a->j[k] + l; 394 } 395 } 396 } 397 } 398 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 399 ierr = PetscFree(jj);CHKERRQ(ierr); 400 401 /* store nonzero values */ 402 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 403 count = 0; 404 for ( i=0; i<a->mbs; i++ ) { 405 for ( j=0; j<bs; j++ ) { 406 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 407 for ( l=0; l<bs; l++ ) { 408 aa[count++] = a->a[bs2*k + l*bs + j]; 409 } 410 } 411 } 412 } 413 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 414 ierr = PetscFree(aa);CHKERRQ(ierr); 415 416 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 417 if (file) { 418 fprintf(file,"-matload_block_size %d\n",a->bs); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNC__ 424 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 425 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 428 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 429 char *outputname; 430 431 PetscFunctionBegin; 432 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 433 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 434 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 435 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 436 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 437 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 438 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 439 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 440 for ( i=0; i<a->mbs; i++ ) { 441 for ( j=0; j<bs; j++ ) { 442 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 443 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 444 for ( l=0; l<bs; l++ ) { 445 #if defined(PETSC_USE_COMPLEX) 446 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 447 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 448 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 449 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 451 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } 455 #else 456 if (a->a[bs2*k + l*bs + j] != 0.0) { 457 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 458 } 459 #endif 460 } 461 } 462 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 463 } 464 } 465 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 466 } else { 467 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 468 for ( i=0; i<a->mbs; i++ ) { 469 for ( j=0; j<bs; j++ ) { 470 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 471 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 472 for ( l=0; l<bs; l++ ) { 473 #if defined(PETSC_USE_COMPLEX) 474 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 475 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 476 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 477 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 478 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 479 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else { 481 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } 483 #else 484 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 485 #endif 486 } 487 } 488 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 489 } 490 } 491 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 492 } 493 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNC__ 498 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 499 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 500 { 501 Mat A = (Mat) Aa; 502 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 503 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 504 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 505 MatScalar *aa; 506 MPI_Comm comm; 507 Viewer viewer; 508 509 PetscFunctionBegin; 510 /* 511 This is nasty. If this is called from an originally parallel matrix 512 then all processes call this, but only the first has the matrix so the 513 rest should return immediately. 514 */ 515 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 516 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 517 if (rank) PetscFunctionReturn(0); 518 519 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 520 521 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 522 523 /* loop over matrix elements drawing boxes */ 524 color = DRAW_BLUE; 525 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 526 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 527 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 528 x_l = a->j[j]*bs; x_r = x_l + 1.0; 529 aa = a->a + j*bs2; 530 for ( k=0; k<bs; k++ ) { 531 for ( l=0; l<bs; l++ ) { 532 if (PetscReal(*aa++) >= 0.) continue; 533 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 534 } 535 } 536 } 537 } 538 color = DRAW_CYAN; 539 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 540 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 541 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 542 x_l = a->j[j]*bs; x_r = x_l + 1.0; 543 aa = a->a + j*bs2; 544 for ( k=0; k<bs; k++ ) { 545 for ( l=0; l<bs; l++ ) { 546 if (PetscReal(*aa++) != 0.) continue; 547 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 548 } 549 } 550 } 551 } 552 553 color = DRAW_RED; 554 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 555 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 556 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 557 x_l = a->j[j]*bs; x_r = x_l + 1.0; 558 aa = a->a + j*bs2; 559 for ( k=0; k<bs; k++ ) { 560 for ( l=0; l<bs; l++ ) { 561 if (PetscReal(*aa++) <= 0.) continue; 562 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNC__ 571 #define __FUNC__ "MatView_SeqBAIJ_Draw" 572 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 573 { 574 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 575 int ierr; 576 double xl,yl,xr,yr,w,h; 577 Draw draw; 578 PetscTruth isnull; 579 580 PetscFunctionBegin; 581 582 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 583 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 584 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 586 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 587 xr += w; yr += h; xl = -w; yl = -h; 588 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 589 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 590 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatView_SeqBAIJ" 596 int MatView_SeqBAIJ(Mat A,Viewer viewer) 597 { 598 int ierr; 599 PetscTruth issocket,isascii,isbinary,isdraw; 600 601 PetscFunctionBegin; 602 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 603 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 605 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 606 if (issocket) { 607 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 608 } else if (isascii){ 609 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 610 } else if (isbinary) { 611 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 612 } else if (isdraw) { 613 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 614 } else { 615 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetValues_SeqBAIJ" 623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 624 { 625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 626 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 627 int *ai = a->i, *ailen = a->ilen; 628 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 629 MatScalar *ap, *aa = a->a, zero = 0.0; 630 631 PetscFunctionBegin; 632 for ( k=0; k<m; k++ ) { /* loop over rows */ 633 row = im[k]; brow = row/bs; 634 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 635 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 636 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 637 nrow = ailen[brow]; 638 for ( l=0; l<n; l++ ) { /* loop over columns */ 639 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 640 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 641 col = in[l] ; 642 bcol = col/bs; 643 cidx = col%bs; 644 ridx = row%bs; 645 high = nrow; 646 low = 0; /* assume unsorted */ 647 while (high-low > 5) { 648 t = (low+high)/2; 649 if (rp[t] > bcol) high = t; 650 else low = t; 651 } 652 for ( i=low; i<high; i++ ) { 653 if (rp[i] > bcol) break; 654 if (rp[i] == bcol) { 655 *v++ = ap[bs2*i+bs*cidx+ridx]; 656 goto finished; 657 } 658 } 659 *v++ = zero; 660 finished:; 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 667 #undef __FUNC__ 668 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 669 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 672 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 673 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 674 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 675 Scalar *value = v; 676 MatScalar *ap,*aa=a->a,*bap; 677 678 PetscFunctionBegin; 679 if (roworiented) { 680 stepval = (n-1)*bs; 681 } else { 682 stepval = (m-1)*bs; 683 } 684 for ( k=0; k<m; k++ ) { /* loop over added rows */ 685 row = im[k]; 686 if (row < 0) continue; 687 #if defined(PETSC_USE_BOPT_g) 688 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 689 #endif 690 rp = aj + ai[row]; 691 ap = aa + bs2*ai[row]; 692 rmax = imax[row]; 693 nrow = ailen[row]; 694 low = 0; 695 for ( l=0; l<n; l++ ) { /* loop over added columns */ 696 if (in[l] < 0) continue; 697 #if defined(PETSC_USE_BOPT_g) 698 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 699 #endif 700 col = in[l]; 701 if (roworiented) { 702 value = v + k*(stepval+bs)*bs + l*bs; 703 } else { 704 value = v + l*(stepval+bs)*bs + k*bs; 705 } 706 if (!sorted) low = 0; high = nrow; 707 while (high-low > 7) { 708 t = (low+high)/2; 709 if (rp[t] > col) high = t; 710 else low = t; 711 } 712 for ( i=low; i<high; i++ ) { 713 if (rp[i] > col) break; 714 if (rp[i] == col) { 715 bap = ap + bs2*i; 716 if (roworiented) { 717 if (is == ADD_VALUES) { 718 for ( ii=0; ii<bs; ii++,value+=stepval ) { 719 for (jj=ii; jj<bs2; jj+=bs ) { 720 bap[jj] += *value++; 721 } 722 } 723 } else { 724 for ( ii=0; ii<bs; ii++,value+=stepval ) { 725 for (jj=ii; jj<bs2; jj+=bs ) { 726 bap[jj] = *value++; 727 } 728 } 729 } 730 } else { 731 if (is == ADD_VALUES) { 732 for ( ii=0; ii<bs; ii++,value+=stepval ) { 733 for (jj=0; jj<bs; jj++ ) { 734 *bap++ += *value++; 735 } 736 } 737 } else { 738 for ( ii=0; ii<bs; ii++,value+=stepval ) { 739 for (jj=0; jj<bs; jj++ ) { 740 *bap++ = *value++; 741 } 742 } 743 } 744 } 745 goto noinsert2; 746 } 747 } 748 if (nonew == 1) goto noinsert2; 749 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 750 if (nrow >= rmax) { 751 /* there is no extra room in row, therefore enlarge */ 752 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 753 MatScalar *new_a; 754 755 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 756 757 /* malloc new storage space */ 758 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 759 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 760 new_j = (int *) (new_a + bs2*new_nz); 761 new_i = new_j + new_nz; 762 763 /* copy over old data into new slots */ 764 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 765 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 766 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 767 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 768 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 769 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 770 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 771 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 772 /* free up old matrix storage */ 773 ierr = PetscFree(a->a);CHKERRQ(ierr); 774 if (!a->singlemalloc) { 775 ierr = PetscFree(a->i);CHKERRQ(ierr); 776 ierr = PetscFree(a->j);CHKERRQ(ierr); 777 } 778 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 779 a->singlemalloc = PETSC_TRUE; 780 781 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 782 rmax = imax[row] = imax[row] + CHUNKSIZE; 783 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 784 a->maxnz += bs2*CHUNKSIZE; 785 a->reallocs++; 786 a->nz++; 787 } 788 N = nrow++ - 1; 789 /* shift up all the later entries in this row */ 790 for ( ii=N; ii>=i; ii-- ) { 791 rp[ii+1] = rp[ii]; 792 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 793 } 794 if (N >= i) { 795 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 796 } 797 rp[i] = col; 798 bap = ap + bs2*i; 799 if (roworiented) { 800 for ( ii=0; ii<bs; ii++,value+=stepval) { 801 for (jj=ii; jj<bs2; jj+=bs ) { 802 bap[jj] = *value++; 803 } 804 } 805 } else { 806 for ( ii=0; ii<bs; ii++,value+=stepval ) { 807 for (jj=0; jj<bs; jj++ ) { 808 *bap++ = *value++; 809 } 810 } 811 } 812 noinsert2:; 813 low = i; 814 } 815 ailen[row] = nrow; 816 } 817 PetscFunctionReturn(0); 818 } 819 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 823 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 824 { 825 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 826 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 827 int m = a->m,*ip, N, *ailen = a->ilen; 828 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 829 MatScalar *aa = a->a, *ap; 830 831 PetscFunctionBegin; 832 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 833 834 if (m) rmax = ailen[0]; 835 for ( i=1; i<mbs; i++ ) { 836 /* move each row back by the amount of empty slots (fshift) before it*/ 837 fshift += imax[i-1] - ailen[i-1]; 838 rmax = PetscMax(rmax,ailen[i]); 839 if (fshift) { 840 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 841 N = ailen[i]; 842 for ( j=0; j<N; j++ ) { 843 ip[j-fshift] = ip[j]; 844 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 845 } 846 } 847 ai[i] = ai[i-1] + ailen[i-1]; 848 } 849 if (mbs) { 850 fshift += imax[mbs-1] - ailen[mbs-1]; 851 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 852 } 853 /* reset ilen and imax for each row */ 854 for ( i=0; i<mbs; i++ ) { 855 ailen[i] = imax[i] = ai[i+1] - ai[i]; 856 } 857 a->nz = ai[mbs]; 858 859 /* diagonals may have moved, so kill the diagonal pointers */ 860 if (fshift && a->diag) { 861 ierr = PetscFree(a->diag);CHKERRQ(ierr); 862 PLogObjectMemory(A,-(m+1)*sizeof(int)); 863 a->diag = 0; 864 } 865 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 866 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 867 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 868 a->reallocs); 869 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 870 a->reallocs = 0; 871 A->info.nz_unneeded = (double)fshift*bs2; 872 873 PetscFunctionReturn(0); 874 } 875 876 877 878 /* 879 This function returns an array of flags which indicate the locations of contiguous 880 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 881 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 882 Assume: sizes should be long enough to hold all the values. 883 */ 884 #undef __FUNC__ 885 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 886 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 887 { 888 int i,j,k,row; 889 PetscTruth flg; 890 891 PetscFunctionBegin; 892 for ( i=0,j=0; i<n; j++ ) { 893 row = idx[i]; 894 if (row%bs!=0) { /* Not the begining of a block */ 895 sizes[j] = 1; 896 i++; 897 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 898 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 899 i++; 900 } else { /* Begining of the block, so check if the complete block exists */ 901 flg = PETSC_TRUE; 902 for ( k=1; k<bs; k++ ) { 903 if (row+k != idx[i+k]) { /* break in the block */ 904 flg = PETSC_FALSE; 905 break; 906 } 907 } 908 if (flg == PETSC_TRUE) { /* No break in the bs */ 909 sizes[j] = bs; 910 i+= bs; 911 } else { 912 sizes[j] = 1; 913 i++; 914 } 915 } 916 } 917 *bs_max = j; 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatZeroRows_SeqBAIJ" 923 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 924 { 925 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 926 int ierr,i,j,k,count,is_n,*is_idx,*rows; 927 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 928 Scalar zero = 0.0; 929 MatScalar *aa; 930 931 PetscFunctionBegin; 932 /* Make a copy of the IS and sort it */ 933 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 934 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 935 936 /* allocate memory for rows,sizes */ 937 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 938 sizes = rows + is_n; 939 940 /* initialize copy IS valurs to rows, and sort them */ 941 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 942 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 943 if (baij->keepzeroedrows) { 944 for ( i=0; i<is_n; i++ ) { sizes[i] = 1; } 945 bs_max = is_n; 946 } else { 947 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 948 } 949 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 950 951 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 952 row = rows[j]; 953 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 954 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 955 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 956 if (sizes[i] == bs && !baij->keepzeroedrows) { 957 if (diag) { 958 if (baij->ilen[row/bs] > 0) { 959 baij->ilen[row/bs] = 1; 960 baij->j[baij->i[row/bs]] = row/bs; 961 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 962 } 963 /* Now insert all the diagoanl values for this bs */ 964 for ( k=0; k<bs; k++ ) { 965 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 966 } 967 } else { /* (!diag) */ 968 baij->ilen[row/bs] = 0; 969 } /* end (!diag) */ 970 } else { /* (sizes[i] != bs) */ 971 #if defined (PETSC_USE_DEBUG) 972 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 973 #endif 974 for ( k=0; k<count; k++ ) { 975 aa[0] = zero; 976 aa+=bs; 977 } 978 if (diag) { 979 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 980 } 981 } 982 } 983 984 ierr = PetscFree(rows);CHKERRQ(ierr); 985 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNC__ 990 #define __FUNC__ "MatSetValues_SeqBAIJ" 991 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 992 { 993 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 994 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 995 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 996 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 997 int ridx,cidx,bs2=a->bs2,ierr; 998 MatScalar *ap,value,*aa=a->a,*bap; 999 1000 PetscFunctionBegin; 1001 for ( k=0; k<m; k++ ) { /* loop over added rows */ 1002 row = im[k]; brow = row/bs; 1003 if (row < 0) continue; 1004 #if defined(PETSC_USE_BOPT_g) 1005 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1006 #endif 1007 rp = aj + ai[brow]; 1008 ap = aa + bs2*ai[brow]; 1009 rmax = imax[brow]; 1010 nrow = ailen[brow]; 1011 low = 0; 1012 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1013 if (in[l] < 0) continue; 1014 #if defined(PETSC_USE_BOPT_g) 1015 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1016 #endif 1017 col = in[l]; bcol = col/bs; 1018 ridx = row % bs; cidx = col % bs; 1019 if (roworiented) { 1020 value = v[l + k*n]; 1021 } else { 1022 value = v[k + l*m]; 1023 } 1024 if (!sorted) low = 0; high = nrow; 1025 while (high-low > 7) { 1026 t = (low+high)/2; 1027 if (rp[t] > bcol) high = t; 1028 else low = t; 1029 } 1030 for ( i=low; i<high; i++ ) { 1031 if (rp[i] > bcol) break; 1032 if (rp[i] == bcol) { 1033 bap = ap + bs2*i + bs*cidx + ridx; 1034 if (is == ADD_VALUES) *bap += value; 1035 else *bap = value; 1036 goto noinsert1; 1037 } 1038 } 1039 if (nonew == 1) goto noinsert1; 1040 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1041 if (nrow >= rmax) { 1042 /* there is no extra room in row, therefore enlarge */ 1043 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1044 MatScalar *new_a; 1045 1046 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1047 1048 /* Malloc new storage space */ 1049 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1050 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1051 new_j = (int *) (new_a + bs2*new_nz); 1052 new_i = new_j + new_nz; 1053 1054 /* copy over old data into new slots */ 1055 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1056 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1057 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1058 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1059 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1060 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1061 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1062 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1063 /* free up old matrix storage */ 1064 ierr = PetscFree(a->a);CHKERRQ(ierr); 1065 if (!a->singlemalloc) { 1066 ierr = PetscFree(a->i);CHKERRQ(ierr); 1067 ierr = PetscFree(a->j);CHKERRQ(ierr); 1068 } 1069 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1070 a->singlemalloc = PETSC_TRUE; 1071 1072 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1073 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1074 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1075 a->maxnz += bs2*CHUNKSIZE; 1076 a->reallocs++; 1077 a->nz++; 1078 } 1079 N = nrow++ - 1; 1080 /* shift up all the later entries in this row */ 1081 for ( ii=N; ii>=i; ii-- ) { 1082 rp[ii+1] = rp[ii]; 1083 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1084 } 1085 if (N>=i) { 1086 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1087 } 1088 rp[i] = bcol; 1089 ap[bs2*i + bs*cidx + ridx] = value; 1090 noinsert1:; 1091 low = i; 1092 } 1093 ailen[brow] = nrow; 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 1098 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1099 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1100 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1101 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1102 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1103 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1104 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1105 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1106 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1107 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1108 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1109 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1110 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1111 extern int MatZeroEntries_SeqBAIJ(Mat); 1112 1113 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1114 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1115 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1116 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1117 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1118 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1119 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1120 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1121 extern int MatSolveTrans_SeqBAIJ_7(Mat,Vec,Vec); 1122 extern int MatSolveTrans_SeqBAIJ_6(Mat,Vec,Vec); 1123 extern int MatSolveTrans_SeqBAIJ_5(Mat,Vec,Vec); 1124 extern int MatSolveTrans_SeqBAIJ_4(Mat,Vec,Vec); 1125 extern int MatSolveTrans_SeqBAIJ_3(Mat,Vec,Vec); 1126 extern int MatSolveTrans_SeqBAIJ_2(Mat,Vec,Vec); 1127 extern int MatSolveTrans_SeqBAIJ_1(Mat,Vec,Vec); 1128 1129 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1130 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1131 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1132 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1133 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1134 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1135 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1136 1137 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1138 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1139 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1140 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1141 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1142 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1143 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1144 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1145 1146 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1147 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1148 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1149 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1150 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1151 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1152 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1153 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1157 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1158 { 1159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1160 Mat outA; 1161 int ierr; 1162 PetscTruth row_identity, col_identity; 1163 1164 PetscFunctionBegin; 1165 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1166 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1167 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1168 if (!row_identity || !col_identity) { 1169 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1170 } 1171 1172 outA = inA; 1173 inA->factor = FACTOR_LU; 1174 1175 if (!a->diag) { 1176 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1177 } 1178 /* 1179 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1180 for ILU(0) factorization with natural ordering 1181 */ 1182 switch (a->bs) { 1183 case 1: 1184 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1185 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1186 case 2: 1187 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1188 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1189 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_2_NaturalOrdering; 1190 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1191 break; 1192 case 3: 1193 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1194 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1195 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_3_NaturalOrdering; 1196 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1197 break; 1198 case 4: 1199 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1200 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1201 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_4_NaturalOrdering; 1202 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1203 break; 1204 case 5: 1205 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1206 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1207 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_5_NaturalOrdering; 1208 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1209 break; 1210 case 6: 1211 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1212 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1213 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_6_NaturalOrdering; 1214 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1215 break; 1216 case 7: 1217 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1218 inA->ops->solvetrans = MatSolveTrans_SeqBAIJ_7_NaturalOrdering; 1219 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1220 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1221 break; 1222 default: 1223 a->row = row; 1224 a->col = col; 1225 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1226 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1227 1228 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1229 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1230 PLogObjectParent(inA,a->icol); 1231 1232 if (!a->solve_work) { 1233 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1234 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1235 } 1236 } 1237 1238 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1239 1240 PetscFunctionReturn(0); 1241 } 1242 #undef __FUNC__ 1243 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1244 int MatPrintHelp_SeqBAIJ(Mat A) 1245 { 1246 static PetscTruth called = PETSC_FALSE; 1247 MPI_Comm comm = A->comm; 1248 int ierr; 1249 1250 PetscFunctionBegin; 1251 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1252 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1253 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 EXTERN_C_BEGIN 1258 #undef __FUNC__ 1259 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1260 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1261 { 1262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1263 int i,nz,n; 1264 1265 PetscFunctionBegin; 1266 nz = baij->maxnz; 1267 n = baij->n; 1268 for (i=0; i<nz; i++) { 1269 baij->j[i] = indices[i]; 1270 } 1271 baij->nz = nz; 1272 for ( i=0; i<n; i++ ) { 1273 baij->ilen[i] = baij->imax[i]; 1274 } 1275 1276 PetscFunctionReturn(0); 1277 } 1278 EXTERN_C_END 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1282 /*@ 1283 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1284 in the matrix. 1285 1286 Input Parameters: 1287 + mat - the SeqBAIJ matrix 1288 - indices - the column indices 1289 1290 Level: advanced 1291 1292 Notes: 1293 This can be called if you have precomputed the nonzero structure of the 1294 matrix and want to provide it to the matrix object to improve the performance 1295 of the MatSetValues() operation. 1296 1297 You MUST have set the correct numbers of nonzeros per row in the call to 1298 MatCreateSeqBAIJ(). 1299 1300 MUST be called before any calls to MatSetValues(); 1301 1302 @*/ 1303 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1304 { 1305 int ierr,(*f)(Mat,int *); 1306 1307 PetscFunctionBegin; 1308 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1309 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1310 if (f) { 1311 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1312 } else { 1313 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* -------------------------------------------------------------------*/ 1319 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1320 MatGetRow_SeqBAIJ, 1321 MatRestoreRow_SeqBAIJ, 1322 MatMult_SeqBAIJ_N, 1323 MatMultAdd_SeqBAIJ_N, 1324 MatMultTrans_SeqBAIJ, 1325 MatMultTransAdd_SeqBAIJ, 1326 MatSolve_SeqBAIJ_N, 1327 0, 1328 0, 1329 0, 1330 MatLUFactor_SeqBAIJ, 1331 0, 1332 0, 1333 MatTranspose_SeqBAIJ, 1334 MatGetInfo_SeqBAIJ, 1335 MatEqual_SeqBAIJ, 1336 MatGetDiagonal_SeqBAIJ, 1337 MatDiagonalScale_SeqBAIJ, 1338 MatNorm_SeqBAIJ, 1339 0, 1340 MatAssemblyEnd_SeqBAIJ, 1341 0, 1342 MatSetOption_SeqBAIJ, 1343 MatZeroEntries_SeqBAIJ, 1344 MatZeroRows_SeqBAIJ, 1345 MatLUFactorSymbolic_SeqBAIJ, 1346 MatLUFactorNumeric_SeqBAIJ_N, 1347 0, 1348 0, 1349 MatGetSize_SeqBAIJ, 1350 MatGetSize_SeqBAIJ, 1351 MatGetOwnershipRange_SeqBAIJ, 1352 MatILUFactorSymbolic_SeqBAIJ, 1353 0, 1354 0, 1355 0, 1356 MatDuplicate_SeqBAIJ, 1357 0, 1358 0, 1359 MatILUFactor_SeqBAIJ, 1360 0, 1361 0, 1362 MatGetSubMatrices_SeqBAIJ, 1363 MatIncreaseOverlap_SeqBAIJ, 1364 MatGetValues_SeqBAIJ, 1365 0, 1366 MatPrintHelp_SeqBAIJ, 1367 MatScale_SeqBAIJ, 1368 0, 1369 0, 1370 0, 1371 MatGetBlockSize_SeqBAIJ, 1372 MatGetRowIJ_SeqBAIJ, 1373 MatRestoreRowIJ_SeqBAIJ, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 MatSetValuesBlocked_SeqBAIJ, 1381 MatGetSubMatrix_SeqBAIJ, 1382 0, 1383 0, 1384 MatGetMaps_Petsc}; 1385 1386 EXTERN_C_BEGIN 1387 #undef __FUNC__ 1388 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1389 int MatStoreValues_SeqBAIJ(Mat mat) 1390 { 1391 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1392 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1393 int ierr; 1394 1395 PetscFunctionBegin; 1396 if (aij->nonew != 1) { 1397 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1398 } 1399 1400 /* allocate space for values if not already there */ 1401 if (!aij->saved_values) { 1402 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1403 } 1404 1405 /* copy values over */ 1406 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 EXTERN_C_END 1410 1411 EXTERN_C_BEGIN 1412 #undef __FUNC__ 1413 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1414 int MatRetrieveValues_SeqBAIJ(Mat mat) 1415 { 1416 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1417 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1418 1419 PetscFunctionBegin; 1420 if (aij->nonew != 1) { 1421 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1422 } 1423 if (!aij->saved_values) { 1424 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1425 } 1426 1427 /* copy values over */ 1428 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 EXTERN_C_END 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatCreateSeqBAIJ" 1435 /*@C 1436 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1437 compressed row) format. For good matrix assembly performance the 1438 user should preallocate the matrix storage by setting the parameter nz 1439 (or the array nnz). By setting these parameters accurately, performance 1440 during matrix assembly can be increased by more than a factor of 50. 1441 1442 Collective on MPI_Comm 1443 1444 Input Parameters: 1445 + comm - MPI communicator, set to PETSC_COMM_SELF 1446 . bs - size of block 1447 . m - number of rows 1448 . n - number of columns 1449 . nz - number of block nonzeros per block row (same for all rows) 1450 - nnz - array containing the number of block nonzeros in the various block rows 1451 (possibly different for each block row) or PETSC_NULL 1452 1453 Output Parameter: 1454 . A - the matrix 1455 1456 Options Database Keys: 1457 . -mat_no_unroll - uses code that does not unroll the loops in the 1458 block calculations (much slower) 1459 . -mat_block_size - size of the blocks to use 1460 1461 Level: intermediate 1462 1463 Notes: 1464 The block AIJ format is fully compatible with standard Fortran 77 1465 storage. That is, the stored row and column indices can begin at 1466 either one (as in Fortran) or zero. See the users' manual for details. 1467 1468 Specify the preallocated storage with either nz or nnz (not both). 1469 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1470 allocation. For additional details, see the users manual chapter on 1471 matrices. 1472 1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1474 @*/ 1475 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1476 { 1477 Mat B; 1478 Mat_SeqBAIJ *b; 1479 int i,len,ierr,mbs,nbs,bs2,size; 1480 PetscTruth flg; 1481 1482 PetscFunctionBegin; 1483 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1484 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1485 1486 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1487 mbs = m/bs; 1488 nbs = n/bs; 1489 bs2 = bs*bs; 1490 1491 if (mbs*bs!=m || nbs*bs!=n) { 1492 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1493 } 1494 1495 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1496 if (nnz) { 1497 for (i=0; i<mbs; i++) { 1498 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1499 } 1500 } 1501 1502 *A = 0; 1503 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1504 PLogObjectCreate(B); 1505 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1506 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1507 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1508 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1509 if (!flg) { 1510 switch (bs) { 1511 case 1: 1512 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1513 B->ops->solve = MatSolve_SeqBAIJ_1; 1514 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_1; 1515 B->ops->mult = MatMult_SeqBAIJ_1; 1516 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1517 break; 1518 case 2: 1519 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1520 B->ops->solve = MatSolve_SeqBAIJ_2; 1521 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_2; 1522 B->ops->mult = MatMult_SeqBAIJ_2; 1523 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1524 break; 1525 case 3: 1526 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1527 B->ops->solve = MatSolve_SeqBAIJ_3; 1528 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_3; 1529 B->ops->mult = MatMult_SeqBAIJ_3; 1530 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1531 break; 1532 case 4: 1533 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1534 B->ops->solve = MatSolve_SeqBAIJ_4; 1535 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_4; 1536 B->ops->mult = MatMult_SeqBAIJ_4; 1537 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1538 break; 1539 case 5: 1540 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1541 B->ops->solve = MatSolve_SeqBAIJ_5; 1542 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_5; 1543 B->ops->mult = MatMult_SeqBAIJ_5; 1544 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1545 break; 1546 case 6: 1547 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1548 B->ops->solve = MatSolve_SeqBAIJ_6; 1549 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_6; 1550 B->ops->mult = MatMult_SeqBAIJ_6; 1551 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1552 break; 1553 case 7: 1554 B->ops->mult = MatMult_SeqBAIJ_7; 1555 B->ops->solve = MatSolve_SeqBAIJ_7; 1556 B->ops->solvetrans = MatSolveTrans_SeqBAIJ_7; 1557 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1558 break; 1559 } 1560 } 1561 B->ops->destroy = MatDestroy_SeqBAIJ; 1562 B->ops->view = MatView_SeqBAIJ; 1563 B->factor = 0; 1564 B->lupivotthreshold = 1.0; 1565 B->mapping = 0; 1566 b->row = 0; 1567 b->col = 0; 1568 b->icol = 0; 1569 b->reallocs = 0; 1570 b->saved_values = 0; 1571 1572 b->m = m; B->m = m; B->M = m; 1573 b->n = n; B->n = n; B->N = n; 1574 1575 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1576 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1577 1578 b->mbs = mbs; 1579 b->nbs = nbs; 1580 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1581 if (!nnz) { 1582 if (nz == PETSC_DEFAULT) nz = 5; 1583 else if (nz <= 0) nz = 1; 1584 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1585 nz = nz*mbs; 1586 } else { 1587 nz = 0; 1588 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1589 } 1590 1591 /* allocate the matrix space */ 1592 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1593 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1594 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1595 b->j = (int *) (b->a + nz*bs2); 1596 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1597 b->i = b->j + nz; 1598 b->singlemalloc = PETSC_TRUE; 1599 1600 b->i[0] = 0; 1601 for (i=1; i<mbs+1; i++) { 1602 b->i[i] = b->i[i-1] + b->imax[i-1]; 1603 } 1604 1605 /* b->ilen will count nonzeros in each block row so far. */ 1606 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1607 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1608 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1609 1610 b->bs = bs; 1611 b->bs2 = bs2; 1612 b->mbs = mbs; 1613 b->nz = 0; 1614 b->maxnz = nz*bs2; 1615 b->sorted = PETSC_FALSE; 1616 b->roworiented = PETSC_TRUE; 1617 b->nonew = 0; 1618 b->diag = 0; 1619 b->solve_work = 0; 1620 b->mult_work = 0; 1621 b->spptr = 0; 1622 B->info.nz_unneeded = (double)b->maxnz; 1623 1624 *A = B; 1625 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1626 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1627 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1629 "MatStoreValues_SeqBAIJ", 1630 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1632 "MatRetrieveValues_SeqBAIJ", 1633 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1635 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1636 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNC__ 1641 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1642 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1643 { 1644 Mat C; 1645 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1646 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1647 1648 PetscFunctionBegin; 1649 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1650 1651 *B = 0; 1652 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1653 PLogObjectCreate(C); 1654 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1655 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1656 C->ops->destroy = MatDestroy_SeqBAIJ; 1657 C->ops->view = MatView_SeqBAIJ; 1658 C->factor = A->factor; 1659 c->row = 0; 1660 c->col = 0; 1661 c->icol = 0; 1662 c->saved_values = 0; 1663 C->assembled = PETSC_TRUE; 1664 1665 c->m = C->m = a->m; 1666 c->n = C->n = a->n; 1667 C->M = a->m; 1668 C->N = a->n; 1669 1670 c->bs = a->bs; 1671 c->bs2 = a->bs2; 1672 c->mbs = a->mbs; 1673 c->nbs = a->nbs; 1674 1675 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1676 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1677 for ( i=0; i<mbs; i++ ) { 1678 c->imax[i] = a->imax[i]; 1679 c->ilen[i] = a->ilen[i]; 1680 } 1681 1682 /* allocate the matrix space */ 1683 c->singlemalloc = PETSC_TRUE; 1684 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1685 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1686 c->j = (int *) (c->a + nz*bs2); 1687 c->i = c->j + nz; 1688 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1689 if (mbs > 0) { 1690 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1691 if (cpvalues == MAT_COPY_VALUES) { 1692 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1693 } else { 1694 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1695 } 1696 } 1697 1698 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1699 c->sorted = a->sorted; 1700 c->roworiented = a->roworiented; 1701 c->nonew = a->nonew; 1702 1703 if (a->diag) { 1704 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1705 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1706 for ( i=0; i<mbs; i++ ) { 1707 c->diag[i] = a->diag[i]; 1708 } 1709 } else c->diag = 0; 1710 c->nz = a->nz; 1711 c->maxnz = a->maxnz; 1712 c->solve_work = 0; 1713 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1714 c->mult_work = 0; 1715 *B = C; 1716 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNC__ 1721 #define __FUNC__ "MatLoad_SeqBAIJ" 1722 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1723 { 1724 Mat_SeqBAIJ *a; 1725 Mat B; 1726 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1727 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1728 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1729 int *masked, nmask,tmp,bs2,ishift; 1730 Scalar *aa; 1731 MPI_Comm comm = ((PetscObject) viewer)->comm; 1732 1733 PetscFunctionBegin; 1734 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1735 bs2 = bs*bs; 1736 1737 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1738 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1739 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1740 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1741 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1742 M = header[1]; N = header[2]; nz = header[3]; 1743 1744 if (header[3] < 0) { 1745 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1746 } 1747 1748 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1749 1750 /* 1751 This code adds extra rows to make sure the number of rows is 1752 divisible by the blocksize 1753 */ 1754 mbs = M/bs; 1755 extra_rows = bs - M + bs*(mbs); 1756 if (extra_rows == bs) extra_rows = 0; 1757 else mbs++; 1758 if (extra_rows) { 1759 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1760 } 1761 1762 /* read in row lengths */ 1763 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1764 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1765 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1766 1767 /* read in column indices */ 1768 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1769 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1770 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1771 1772 /* loop over row lengths determining block row lengths */ 1773 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1774 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1775 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1776 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1777 masked = mask + mbs; 1778 rowcount = 0; nzcount = 0; 1779 for ( i=0; i<mbs; i++ ) { 1780 nmask = 0; 1781 for ( j=0; j<bs; j++ ) { 1782 kmax = rowlengths[rowcount]; 1783 for ( k=0; k<kmax; k++ ) { 1784 tmp = jj[nzcount++]/bs; 1785 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1786 } 1787 rowcount++; 1788 } 1789 browlengths[i] += nmask; 1790 /* zero out the mask elements we set */ 1791 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1792 } 1793 1794 /* create our matrix */ 1795 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1796 B = *A; 1797 a = (Mat_SeqBAIJ *) B->data; 1798 1799 /* set matrix "i" values */ 1800 a->i[0] = 0; 1801 for ( i=1; i<= mbs; i++ ) { 1802 a->i[i] = a->i[i-1] + browlengths[i-1]; 1803 a->ilen[i-1] = browlengths[i-1]; 1804 } 1805 a->nz = 0; 1806 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1807 1808 /* read in nonzero values */ 1809 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1810 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1811 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1812 1813 /* set "a" and "j" values into matrix */ 1814 nzcount = 0; jcount = 0; 1815 for ( i=0; i<mbs; i++ ) { 1816 nzcountb = nzcount; 1817 nmask = 0; 1818 for ( j=0; j<bs; j++ ) { 1819 kmax = rowlengths[i*bs+j]; 1820 for ( k=0; k<kmax; k++ ) { 1821 tmp = jj[nzcount++]/bs; 1822 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1823 } 1824 rowcount++; 1825 } 1826 /* sort the masked values */ 1827 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1828 1829 /* set "j" values into matrix */ 1830 maskcount = 1; 1831 for ( j=0; j<nmask; j++ ) { 1832 a->j[jcount++] = masked[j]; 1833 mask[masked[j]] = maskcount++; 1834 } 1835 /* set "a" values into matrix */ 1836 ishift = bs2*a->i[i]; 1837 for ( j=0; j<bs; j++ ) { 1838 kmax = rowlengths[i*bs+j]; 1839 for ( k=0; k<kmax; k++ ) { 1840 tmp = jj[nzcountb]/bs ; 1841 block = mask[tmp] - 1; 1842 point = jj[nzcountb] - bs*tmp; 1843 idx = ishift + bs2*block + j + bs*point; 1844 a->a[idx] = aa[nzcountb++]; 1845 } 1846 } 1847 /* zero out the mask elements we set */ 1848 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1849 } 1850 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1851 1852 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1853 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1854 ierr = PetscFree(aa);CHKERRQ(ierr); 1855 ierr = PetscFree(jj);CHKERRQ(ierr); 1856 ierr = PetscFree(mask);CHKERRQ(ierr); 1857 1858 B->assembled = PETSC_TRUE; 1859 1860 ierr = MatView_Private(B);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 1864 1865 1866