read.pdb(file, maxlines = -1, multi = FALSE, rm.insert = FALSE, rm.alt = TRUE, ATOM.only = FALSE, hex = FALSE, verbose = TRUE)read.pdb2(file, maxlines = -1, multi = FALSE, rm.insert = FALSE, rm.alt = TRUE, ATOM.only = FALSE, verbose = TRUE)"print"(x, printseq=TRUE, ...)"summary"(object, printseq=FALSE, ...)
read.pdb
. read.pdb
. pdbseq
. Read a Protein Data Bank (PDB) coordinate file.
read.pdb
is a re-implementation (using Rcpp) of the slower but
more tested R implementation of the same function (called
read.pdb2
since bio3d-v2.3).
maxlines
may be set so as to restrict the reading to a portion
of input files. Note that the preferred means of reading large
multi-model files is via binary DCD or NetCDF format trajectory files
(see the read.dcd
and read.ncdf
functions).
"pdb"
with the following components:
"xyz"
containing the
ATOM and HETATM coordinate data. nrow(atom)
with TRUE values indicating a C-alpha elety. "pdb"
. It can be used for building biological units
(See biounit
). Grant, B.J. et al. (2006) Bioinformatics 22, 2695--2696.
For a description of PDB format (version3.3) see: http://www.wwpdb.org/documentation/format33/v3.3.html.
For both atom
and het
list components the column names can be
used as a convenient means of data access, namely:
Atom serial number eleno ,
Atom type elety,
Alternate location indicator alt,
Residue name resid,
Chain identifier chain,
Residue sequence number resno,
Code for insertion of residues insert,
Orthogonal coordinates x,
Orthogonal coordinates y,
Orthogonal coordinates z,
Occupancy o, and
Temperature factor b.
See examples for further details.
## Read a PDB file from the RCSB online database #pdb <- read.pdb("4q21") ## Read a PDB file from those included with the package pdb <- read.pdb( system.file("examples/1hel.pdb", package="bio3d") ) ## Print a brief composition summary pdbCall: read.pdb(file = system.file("examples/1hel.pdb", package = "bio3d")) Total Models#: 1 Total Atoms#: 1001, XYZs#: 3003 Chains#: 1 (values: A) Protein Atoms#: 1001 (residues/Calpha atoms#: 129) Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0) Non-protein/nucleic Atoms#: 0 (residues: 0) Non-protein/nucleic resid values: [ none ] Protein sequence: KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINS RWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDV QAWIRGCRL + attr: atom, xyz, seqres, helix, sheet, calpha, remark, call## Examine the storage format (or internal *str*ucture) str(pdb)List of 8 $ atom :'data.frame': 1001 obs. of 16 variables: ..$ type : chr [1:1001] "ATOM" "ATOM" "ATOM" "ATOM" ... ..$ eleno : int [1:1001] 1 2 3 4 5 6 7 8 9 10 ... ..$ elety : chr [1:1001] "N" "CA" "C" "O" ... ..$ alt : chr [1:1001] NA NA NA NA ... ..$ resid : chr [1:1001] "LYS" "LYS" "LYS" "LYS" ... ..$ chain : chr [1:1001] "A" "A" "A" "A" ... ..$ resno : int [1:1001] 1 1 1 1 1 1 1 1 1 2 ... ..$ insert: chr [1:1001] NA NA NA NA ... ..$ x : num [1:1001] 3.294 2.388 2.438 2.406 0.949 ... ..$ y : num [1:1001] 10.2 10.5 12 12.9 10.1 ... ..$ z : num [1:1001] 10.27 9.17 8.89 9.81 9.56 ... ..$ o : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ... ..$ b : num [1:1001] 11.18 9.68 14 14 13.29 ... ..$ segid : chr [1:1001] NA NA NA NA ... ..$ elesy : chr [1:1001] "N" "C" "C" "O" ... ..$ charge: chr [1:1001] NA NA NA NA ... $ xyz : xyz [1, 1:3003] 3.29 10.16 10.27 2.39 10.53 ... ..- attr(*, "class")= chr [1:2] "xyz" "matrix" $ seqres: Named chr [1:129] "LYS" "VAL" "PHE" "GLY" ... ..- attr(*, "names")= chr [1:129] "A" "A" "A" "A" ... $ helix :List of 4 ..$ start: Named num [1:7] 4 25 79 88 104 109 120 .. ..- attr(*, "names")= chr [1:7] "" "" "" "" ... ..$ end : Named num [1:7] 16 34 84 100 108 114 124 .. ..- attr(*, "names")= chr [1:7] "" "" "" "" ... ..$ chain: chr [1:7] "A" "A" "A" "A" ... ..$ type : chr [1:7] "1" "1" "5" "1" ... $ sheet :List of 4 ..$ start: Named num [1:3] 42 49 56 .. ..- attr(*, "names")= chr [1:3] "" "" "" ..$ end : Named num [1:3] 46 54 60 .. ..- attr(*, "names")= chr [1:3] "" "" "" ..$ chain: chr [1:3] "A" "A" "A" ..$ sense: chr [1:3] "0" "-1" "-1" $ calpha: logi [1:1001] FALSE TRUE FALSE FALSE FALSE FALSE ... $ remark:List of 1 ..$ biomat:List of 4 .. ..$ num : int 1 .. ..$ chain :List of 1 .. .. ..$ : chr "A" .. ..$ mat :List of 1 .. .. ..$ :List of 1 .. .. .. ..$ A: num [1:3, 1:4] 1 0 0 0 1 0 0 0 1 0 ... .. ..$ method: chr "AUTHOR" $ call : language read.pdb(file = system.file("examples/1hel.pdb", package = "bio3d")) - attr(*, "class")= chr [1:2] "pdb" "sse"## Print data for the first four atom pdb$atom[1:4,]type eleno elety alt resid chain resno insert x y z o b 1 ATOM 1 N <NA> LYS A 1 <NA> 3.294 10.164 10.266 1 11.18 2 ATOM 2 CA <NA> LYS A 1 <NA> 2.388 10.533 9.168 1 9.68 3 ATOM 3 C <NA> LYS A 1 <NA> 2.438 12.049 8.889 1 14.00 4 ATOM 4 O <NA> LYS A 1 <NA> 2.406 12.898 9.815 1 14.00 segid elesy charge 1 <NA> N <NA> 2 <NA> C <NA> 3 <NA> C <NA> 4 <NA> O <NA>## Print some coordinate data head(pdb$atom[, c("x","y","z")])x y z 1 3.294 10.164 10.266 2 2.388 10.533 9.168 3 2.438 12.049 8.889 4 2.406 12.898 9.815 5 0.949 10.101 9.559 6 -0.050 10.621 8.573## Or coordinates as a numeric vector #head(pdb$xyz) ## Print C-alpha coordinates (can also use 'atom.select' function) head(pdb$atom[pdb$calpha, c("resid","elety","x","y","z")])resid elety x y z 2 LYS CA 2.388 10.533 9.168 11 VAL CA 2.524 13.840 7.282 18 PHE CA -1.021 15.173 7.169 29 GLY CA -2.645 17.273 4.511 33 ARG CA -4.106 20.560 5.674 44 CYS CA -7.767 19.572 5.189inds <- atom.select(pdb, elety="CA") head( pdb$atom[inds$atom, ] )type eleno elety alt resid chain resno insert x y z o b 2 ATOM 2 CA <NA> LYS A 1 <NA> 2.388 10.533 9.168 1 9.68 11 ATOM 11 CA <NA> VAL A 2 <NA> 2.524 13.840 7.282 1 10.02 18 ATOM 18 CA <NA> PHE A 3 <NA> -1.021 15.173 7.169 1 15.34 29 ATOM 29 CA <NA> GLY A 4 <NA> -2.645 17.273 4.511 1 13.97 33 ATOM 33 CA <NA> ARG A 5 <NA> -4.106 20.560 5.674 1 11.63 44 ATOM 44 CA <NA> CYS A 6 <NA> -7.767 19.572 5.189 1 12.93 segid elesy charge 2 <NA> C <NA> 11 <NA> C <NA> 18 <NA> C <NA> 29 <NA> C <NA> 33 <NA> C <NA> 44 <NA> C <NA>## The atom.select() function returns 'indices' (row numbers) ## that can be used for accessing subsets of PDB objects, e.g. inds <- atom.select(pdb,"ligand") pdb$atom[inds$atom,][1] type eleno elety alt resid chain resno insert x y [11] z o b segid elesy charge <0 rows> (or 0-length row.names)pdb$xyz[inds$xyz]numeric(0)## See the help page for atom.select() function for more details. ## Print SSE data for helix and sheet, ## see also dssp() and stride() functions print.sse(pdb)Call: read.pdb(file = system.file("examples/1hel.pdb", package = "bio3d")) Class: pdbsse Helices: 7 4-16 (A) 25-34 (A) 79-84 (A) 88-100 (A) 104-108 (A) 109-114 (A) 120-124 (A) Sheets: 3 42-46 (A) 49-54 (A) 56-60 (A) Turns: 0 Output is provided in residue numberspdb$helix$start 4 25 79 88 104 109 120 $end 16 34 84 100 108 114 124 $chain [1] "A" "A" "A" "A" "A" "A" "A" $type [1] "1" "1" "5" "1" "5" "1" "5"pdb$sheet$start42 49 56## Print SEQRES data pdb$seqresA A A A A A A A A A A A A "LYS" "VAL" "PHE" "GLY" "ARG" "CYS" "GLU" "LEU" "ALA" "ALA" "ALA" "MET" "LYS" A A A A A A A A A A A A A "ARG" "HIS" "GLY" "LEU" "ASP" "ASN" "TYR" "ARG" "GLY" "TYR" "SER" "LEU" "GLY" A A A A A A A A A A A A A "ASN" "TRP" "VAL" "CYS" "ALA" "ALA" "LYS" "PHE" "GLU" "SER" "ASN" "PHE" "ASN" A A A A A A A A A A A A A "THR" "GLN" "ALA" "THR" "ASN" "ARG" "ASN" "THR" "ASP" "GLY" "SER" "THR" "ASP" A A A A A A A A A A A A A "TYR" "GLY" "ILE" "LEU" "GLN" "ILE" "ASN" "SER" "ARG" "TRP" "TRP" "CYS" "ASN" A A A A A A A A A A A A A "ASP" "GLY" "ARG" "THR" "PRO" "GLY" "SER" "ARG" "ASN" "LEU" "CYS" "ASN" "ILE" A A A A A A A A A A A A A "PRO" "CYS" "SER" "ALA" "LEU" "LEU" "SER" "SER" "ASP" "ILE" "THR" "ALA" "SER" A A A A A A A A A A A A A "VAL" "ASN" "CYS" "ALA" "LYS" "LYS" "ILE" "VAL" "SER" "ASP" "GLY" "ASN" "GLY" A A A A A A A A A A A A A "MET" "ASN" "ALA" "TRP" "VAL" "ALA" "TRP" "ARG" "ASN" "ARG" "CYS" "LYS" "GLY" A A A A A A A A A A A A "THR" "ASP" "VAL" "GLN" "ALA" "TRP" "ILE" "ARG" "GLY" "CYS" "ARG" "LEU"## SEQRES as one letter code aa321(pdb$seqres)[1] "K" "V" "F" "G" "R" "C" "E" "L" "A" "A" "A" "M" "K" "R" "H" "G" "L" "D" [19] "N" "Y" "R" "G" "Y" "S" "L" "G" "N" "W" "V" "C" "A" "A" "K" "F" "E" "S" [37] "N" "F" "N" "T" "Q" "A" "T" "N" "R" "N" "T" "D" "G" "S" "T" "D" "Y" "G" [55] "I" "L" "Q" "I" "N" "S" "R" "W" "W" "C" "N" "D" "G" "R" "T" "P" "G" "S" [73] "R" "N" "L" "C" "N" "I" "P" "C" "S" "A" "L" "L" "S" "S" "D" "I" "T" "A" [91] "S" "V" "N" "C" "A" "K" "K" "I" "V" "S" "D" "G" "N" "G" "M" "N" "A" "W" [109] "V" "A" "W" "R" "N" "R" "C" "K" "G" "T" "D" "V" "Q" "A" "W" "I" "R" "G" [127] "C" "R" "L"## Where is the P-loop motif in the ATOM sequence inds.seq <- motif.find("G....GKT", pdbseq(pdb)) pdbseq(pdb)[inds.seq]4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 "G" "R" "C" "E" "L" "A" "A" "A" "M" "K" "R" "H" "G" "L" "D" "N" "Y" "R" "G" "Y" 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 "S" "L" "G" "N" "W" "V" "C" "A" "A" "K" "F" "E" "S" "N" "F" "N" "T" "Q" "A" "T" 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 "N" "R" "N" "T" "D" "G" "S" "T" "D" "Y" "G" "I" "L" "Q" "I" "N" "S" "R" "W" "W" 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 "C" "N" "D" "G" "R" "T" "P" "G" "S" "R" "N" "L" "C" "N" "I" "P" "C" "S" "A" "L" 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 "L" "S" "S" "D" "I" "T" "A" "S" "V" "N" "C" "A" "K" "K" "I" "V" "S" "D" "G" "N" 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 "G" "M" "N" "A" "W" "V" "A" "W" "R" "N" "R" "C" "K" "G" "T" "D" "V" "Q" "A" "W" 124 125 126 127 128 129 "I" "R" "G" "C" "R" "L"## Where is it in the structure inds.pdb <- atom.select(pdb,resno=inds.seq, elety="CA") pdb$atom[inds.pdb$atom,][1] type eleno elety alt resid chain resno insert x y [11] z o b segid elesy charge <0 rows> (or 0-length row.names)pdb$xyz[inds.pdb$xyz]numeric(0)## View in interactive 3D mode #view(pdb)
atom.select
, write.pdb
,
trim.pdb
, cat.pdb
,
read.prmtop
, as.pdb
,
read.dcd
, read.ncdf
,
read.fasta.pdb
, read.fasta
,
biounit