# unknowns/wildcards
x NC([*])C(=O)O # unknown NRP
pk C([*])C(-O) # unknown polyketide

# standard aminos
ala NC(C)C(=O)O # alanine
arg NC(CCCNC(N)=N)C(=O)O # arginine
asn NC(CC(=O)N)C(=O)O # asparagine
asp NC(CC(=O)O)C(=O)O # aspartic acid
cys NC(CS)C(=O)O # cysteine
gln NC(CCC(=O)N)C(=O)O # glutamine
glu NC(CCC(=O)O)C(=O)O # glutamic acid
gly NCC(=O)O # glycine
his NC(CC1=CNC=N1)C(=O)O # histidine
ile NC(C(C)CC)C(=O)O # isoleucine
leu NC(CC(C)C)C(=O)O # leucine
lys NC(CCCCN)C(=O)O # lysine
met NC(CCSC)C(=O)O # methionine
phe NC(Cc1ccccc1)C(=O)O # phenylalanine
pro N1C(CCC1)C(=O)O # proline
ser NC(CO)C(=O)O # serine
thr NC(C(O)C)C(=O)O # threonine
trp NC(CC1=CNc2c1cccc2)C(=O)O # tryptophan
tyr NC(Cc1ccc(O)cc1)C(=O)O # tyrosine
val NC(C(C)C)C(=O)O # valine

# NORINE amino acids
3Me-Glu NC(C(C)CC(=O))C(=O)O # 3-methyl-glutamate
bAla NCCC(=O)O # beta-alanine
aIle NC(C(C)CC)C(=O)O # actually aIle, allo-isoleucine
aThr CC(O)C(N)C(=O)O # allo-threonine
diOH-Bz Oc1c(O)cccc1C(=O)O # 2,3-dihydroxy-benzoic acid
bLys NCCCC(N)CC(=O)O # beta-lysine
bOH-Tyr NC(C(O)c1ccc(O)cc1)C(=O)O # beta-hydroxy-tyrosine
Cl2-Hpg NC(c1cc(Cl)c(O)c(Cl)c1)C(=O)O # 3,5-dichloro-4-hydroxy-phenylglycine
D-Ala NC(C)C(=O)O # D-alanine
D-Hiv OC(C(C)C)C(=O)O # D-2-hydroxyisovalerate
D-Hmp CCC(C)C(O)C(=O)O # 2-hydroxy-3-methyl-pentanoic acid (D-hmp)
dhpg NC(c1cc(O)cc(O)c1)C(=O)O # 3,5-dihydroxy-phenylglycine
Hpr N1C(CCCC1)C(=O)O # pipecolic acid
IVal NC(CC)(C)C(=O)O # isovaline
OH-Orn NC(CCCNO)C(=O)O # N5-hydroxyornithine
Fo-OH-Orn NC(CCCN(O)C=O)C(=O)O # N5-formyl-N5-hydroxyornithine
Ac-OH-Orn NC(CCCN(O)C(=O)C)C(=O)O # L-δ-N-acetyl-δ-N-hydroxyornithine/L-Nδ-hydroxy-Nδ-acylornithine
C10:0-NH2(2)-Ep(9)-oxo(8) NC(CCCCCC(=O)C1OC1)C(=O)O # 2-amino-8-oxo-9,10-decanoate
Valol NC(C(C)C)CO # valinol
Pgl NC(c1ccccc1)C(=O)O # phenylglycine


# other NRPSPredictor predictions
pPro N1CC(CCC)CC1C(=O)O # 4-propyl-proline
aad NC(CCCC(=O)O)C(=O)O # 2-amino-adipic acid
abu NC(C(C))C(=O)O # 2-amino-butyric acid
bmt NC(C(O)C(C)CC=CC)C(=O)O # 4-butenyl-4-methyl threonine
cap NC(C1CCN=C(N1)N)C(=O)O # capreomycidine
dab NC(CCN)C(=O)O # 2,4-diaminobutyric acid
dht NC(C(=O)C)C(=O)O # dehydro-threonine/2,3-dehydroaminobutyric acid
hiv OC(C(C)C)C(=O)O # 2-hydroxyisovalerate
hpg NC(c1ccc(O)cc1)C(=O)O # 4-hydroxy-phenylglycine
hyv NC(C(CO)C)C(=O)O # 4-hydroxy-L-valine
hyv-d OC(C(C)C)C(=O)O # 2-hydroxy-valeric acid
orn NC(CCCN)C(=O)O # ornithine
sal Oc1ccccc1C(=O)O # salicylic acid (not in norine yet)
tcl NC(CC(C)C(Cl)(Cl)(Cl))C(=O)O # (4S)-5,5,5-trichloro-leucine
LDAP NC(CCCC(N)C(=O)O)C(=O)O # diaminopimelic acid
meval NC(C(C)(C)C)C(=O)O # Me-Val
alaninol NC(C)CO
N-(1,1-dimethyl-1-allyl)Trp NC(CC1=CN(C(C)(C)C=C)c2c1cccc2)C(=O)O
d-lyserg CN1CC(C=C2C1CC3=CNC4=CC=CC2=C34)C(=O)O # D-lysergic acid
ser-thr NC(C([*])O)C(=O)O # Serine or Threonine
mephe NC(C(C)c1ccccc1)C(=O)O # Cmethyl-phenylalanine?
hasn NC(C(O)C(=O)N)C(=O)O # hydroxyasparagine
s-nmethoxy-trp NC(CC1=CN(OC)c2c1cccc2)C(=O)O
alpha-hydroxy-isocaproic-acid OC(C(O)CC(C)C)C(=O)O # 2S-Hic
MeHOval O=C(C(C)CC)C(=O)O # 3-Methyl-2-oxovaleric acid
2-oxo-isovaleric-acid O=C(C(C)C)C(=O)O
aoda NC(CCCCCC(=O)CC)C(=O)O # S-2-amino-8-oxodecanoic acid

# special symbols for PKS chain predictions
pks-start1 C
pks-end1 C(C)C(=O)O
pks-end2 C(=O)O

# malonyl-CoA variants from PKS predictions
# base
mal CC(=O) # malonyl-CoA
ohmal CC(O)
ccmal C=C # double-bonded malonyl-CoA
redmal CC # reduced malonyl-CoA

# methylated variants
me-mal C(C)C(=O)
me-ohmal C(C)C(O)
me-ccmal C(C)=C
me-redmal C(C)C

# methoxy variants
mxmal C(OC)C(=O)
ohmxmal C(OC)C(O)
ccmxmal C(OC)=C
redmxmal C(OC)C

# eythl variants
emal C(CC)C(=O)
ohemal C(CC)C(O)
ccemal C(CC)=C
redemal C(CC)C

# MINOWA predictions
AHBA C1=CC(=O)C(N)C=C1C(=O)O
ohAHBA C1=CC(O)C(N)C=C1C(=O)O  # possible modification by a KR, consistent with PKS substrate modification naming
fatty_acid C[*]C(=O)O
NH2 N
Acetyl-CoA CC(=O)O
shikimic_acid C1C(O)C(O)C(O)C=C1C(=O)O
