------ ASN.1 file format -----------
ASN.1 -- see NCBI toolkit docs, source and examples (ncbi.nlm.nih.gov)
Example asn.1 sequence file----
Bioseq-set ::= {
seq-set {
seq {
id { local id 1 } , -- id essential
descr { title "Dummy sequence data from nowhere" } , -- optional
inst { -- inst essential
repr raw ,
mol dna ,
length 156 ,
topology linear ,
seq-data
iupacna "GAATTCATTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
TGGATTCAAAGCAATAGAGTTGTTCTT"
} } ,
seq {
id { local id 2 } ,
descr { title "Dummy sequence 2 data from somewhere else" } ,
inst {
repr raw ,
mol dna ,
length 150 ,
topology linear ,
seq-data
iupacna "TTTTTTTTTTTTGAAACAAATCGACCTGACGACGGAATGGTACTCGAATTA
TGGGCCAAAGGGTTTTATGGGACAAATTAATAGGTGTTCATTATATGCCACTTTCGGAGATTAGATACAGCAATGCAG
TGGATTCAAAGCAATAGAGTT"
}
}
}
}
------------------------------------------------
partial ASN.1 description from toolkit
Bioseq-set ::= SEQUENCE { -- just a collection
id Object-id OPTIONAL ,
coll Dbtag OPTIONAL , -- to identify a collection
level INTEGER OPTIONAL , -- nesting level
class ENUMERATED {
not-set (0) ,
nuc-prot (1) , -- nuc acid and coded proteins
segset (2) , -- segmented sequence + parts
conset (3) , -- constructed sequence + parts
parts (4) , -- parts for 2 or 3
gibb (5) , -- geninfo backbone
gi (6) , -- geninfo
genbank (7) , -- converted genbank
pir (8) , -- converted pir
pub-set (9) , -- all the seqs from a single publication
equiv (10) , -- a set of equivalent maps or seqs
swissprot (11) , -- converted SWISSPROT
pdb-entry (12) , -- a complete PDB entry
mut-set (13) , -- set of mutations
pop-set (14) , -- population study
phy-set (15) , -- phylogenetic study
eco-set (16) , -- ecological sample study
gen-prod-set (17) , -- genomic products, chrom+mRNa+protein
other (255) } DEFAULT not-set ,
release VisibleString OPTIONAL ,
date Date OPTIONAL ,
descr Seq-descr OPTIONAL ,
seq-set SEQUENCE OF Seq-entry ,
annot SET OF Seq-annot OPTIONAL }
Seq-entry ::= CHOICE {
seq Bioseq ,
set Bioseq-set }
Bioseq ::= SEQUENCE {
id SET OF Seq-id , -- equivalent identifiers
descr Seq-descr OPTIONAL , -- descriptors
inst Seq-inst , -- the sequence data
annot SET OF Seq-annot OPTIONAL }
SSeq-inst ::= SEQUENCE { -- the sequence data itself
repr ENUMERATED { -- representation class
not-set (0) , -- empty
virtual (1) , -- no seq data
raw (2) , -- continuous sequence
seg (3) , -- segmented sequence
const (4) , -- constructed sequence
ref (5) , -- reference to another sequence
consen (6) , -- consensus sequence or pattern
map (7) , -- ordered map of any kind
delta (8) , -- sequence made by changes (delta) to others
other (255) } ,
mol ENUMERATED { -- molecule class in living organism
not-set (0) , -- > cdna = rna
dna (1) ,
rna (2) ,
aa (3) ,
na (4) , -- just a nucleic acid
other (255) } ,
length INTEGER OPTIONAL , -- length of sequence in residues
fuzz Int-fuzz OPTIONAL , -- length uncertainty
topology ENUMERATED { -- topology of molecule
not-set (0) ,
linear (1) ,
circular (2) ,
tandem (3) , -- some part of tandem repeat
other (255) } DEFAULT linear ,
strand ENUMERATED { -- strandedness in living organism
not-set (0) ,
ss (1) , -- single strand
ds (2) , -- double strand
mixed (3) ,
other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
seq-data Seq-data OPTIONAL , -- the sequence
ext Seq-ext OPTIONAL , -- extensions for special types
hist Seq-hist OPTIONAL } -- sequence history
---------------------------------------------------