"""Feature of Molecular System --- :mod:`molann.feature`
==========================================================
:Author: Wei Zhang
:Year: 2022
:Copyright: GNU Public License v3
This module implements a class that defines a feature of a molecular system
(:class:`molann.feature.Feature`), and a class that constructs a list of
features from a feature file (read by :class:`molann.feature.FeatureFileReader`).
Classes
-------
.. autoclass:: Feature
:members:
.. autoclass:: FeatureFileReader
:members:
"""
import torch
import pandas as pd
[docs]class Feature(object):
r"""Feature of system
:param str name: feature's name
:param str feature_type: currently supported values: 'angle', 'bond', 'dihedral', and 'position'
:param atom_group: atom group used to define a feature
:type atom_group: :external+mdanalysis:class:`MDAnalysis.core.groups.AtomGroup`
:raises AssertionError: if the number of atoms in **atom_group** does not match **feature_type**.
Note:
For **feature_type** = 'angle', 'bond', and 'dihedral', **atom_group** must contain 3 atoms, 2 atoms, and 4 atoms, respectively.
Examples:
.. code-block:: python
# package MDAnalysis is required
import MDAnalysis as mda
from molann.feature import Feature
# pdb file of the system
pdb_filename = '/path/to/system.pdb'
ref = mda.Universe(pdb_filename)
# feature that is the angle formed by the atoms whose (1-based) indices are 1, 2 and 3.
f1 = Feature('some name', 'angle', ref.select_atoms('bynum 1 2 3'))
print (f1.get_feature_info())
# feature that is the bond distance between the two atoms whose (1-based) indices are 1 and 2.
f2 = Feature('some name', 'bond', ref.select_atoms('bynum 1 2'))
# dihedral angle formed by atoms whose (1-based) indices are 1, 2, 3, and 4.
f3 = Feature('some name', 'dihedral', ref.select_atoms('bynum 1 2 3 4'))
# coordinates of atoms whose (1-based) indices are 3 and 5.
f4 = Feature('some name', 'position', ref.select_atoms('bynum 3 5'))
Note:
:external+mdanalysis:meth:`MDAnalysis.core.universe.Universe.select_atoms()` returns an atom group that may not preserve the order of atoms.
To construct a feature that defines the dihedral angle formed by atoms whose indices are 2,1,3, and 4, we can define the atom group by concatenation:
.. code-block:: python
ag = ref.select_atoms('bynum 2') + ref.select_atoms('bynum 1') + ref.select_atoms('bynum 3 and 4')
f = Feature('some name', 'dihedral', ag)
Attributes:
name: string, name of the feature
type_name: string, whose value is either 'angle', 'bond', 'dihedral', or 'position'
type_id: integer, 0 if type_name='angle', 1 if type_name='bond', 2 if type_name='dihedral', 3 if type_name='position'
atom_group: :external+mdanalysis:class:`MDAnalysis.core.groups.AtomGroup` used to define the feature
"""
def __init__(self, name, feature_type, atom_group):
if feature_type not in ['angle', 'bond', 'dihedral', 'position']:
raise NotImplementedError(f'feature {feature_type} not implemented!')
if len(set(atom_group)) < len(atom_group) :
raise IndexError(f'atom group contains repeated elements!')
if feature_type == 'angle':
assert len(atom_group)==3, '3 atoms are needed to define an angle feature, {} provided'.format(len(atom_group))
type_id = 0
if feature_type == 'bond':
assert len(atom_group)==2, '2 atoms are needed to define a bond length feature, {} provided'.format(len(atom_group))
type_id = 1
if feature_type == 'dihedral':
assert len(atom_group)==4, '4 atoms are needed to define a dihedral angle feature, {} provided'.format(len(atom_group))
type_id = 2
if feature_type == 'position':
type_id = 3
self.name = name
self.type_name = feature_type
self.atom_group = atom_group
self.type_id = type_id
[docs] def get_name(self):
"""
Returns:
:attr:`name`
"""
return self.name
[docs] def get_type(self):
"""
Returns:
:attr:`type_name`
"""
return self.type_name
[docs] def get_atom_indices(self):
"""
Returns:
numpy array of int, (1-based) indices of atoms in the atom group.
"""
return self.atom_group.ix+1
[docs] def get_type_id(self):
"""
Returns:
:attr:`type_id`
"""
return self.type_id
[docs] def get_feature_info(self):
"""
Returns:
:external+pandas:class:`pandas.DataFrame`, which contains feature's information
"""
return pd.DataFrame({'name': self.name, 'type': self.type_name, 'type_id': self.type_id, 'atom indices (1-based)': [self.get_atom_indices()]})
[docs]class FeatureFileReader(object):
r"""Read features from file
:param str feature_file: name of the feature file
:param str section_name: name of the section in the file from which to read features
:param universe: universe that defines the system
:type universe: :external+mdanalysis:class:`MDAnalysis.core.universe.Universe`
Note:
A feature file is a normal text file.
Each section describes a list of features.
A section begins with a line of content '[section_name]', and ends
with a line of content '[End]' (case sensitive). The function :meth:`read` constructs a list of :class:`Feature` from the section with
the corresponding section_name.
Lines that describe a feature contain several fields, seperated by
comma. The first and the second fields specify the `name` and the
`type_name` of the feature, respectively. The remaining fields specify
the strings for selection to define an atom group (by concatenation). See :external+mdanalysis:mod:`MDAnalysis.core.selection`.
Lines starting with '#' are comment lines and they are not processed.
Examples:
Below is an example of feature file, named as *feature.txt*.
.. code-block:: text
# This is a comment line.
# Lines that describe a feature contain several fields, seperated by comma.
# The first and the second fields specify the name and the
# type_name of the feature, respectively. The remaining fields specify
# the strings for selection to define an atom group by concatenation.
# Note: to keep the order of atoms, use 'bynum 5, bynum 2', instead of 'bynum 5 2'
[Preprocessing]
#position, type C or type O or type N
p1, position, resid 2
[End]
[Histogram]
d1, dihedral, bynum 5, bynum 7, bynum 9, bynum 15
d2, dihedral, bynum 7, bynum 9, bynum 15, bynum 17
b1, bond, bynum 2 5
b2, bond, bynum 5 6
a1, angle, bynum 20, bynum 19, bynum 21
a2, angle, bynum 16, bynum 15, bynum 17
[End]
[Output]
d1, dihedral, bynum 5 7 9 15
d2, dihedral, bynum 7 9 15 17
[End]
The following code constructs a list of features from the section 'Histogram', and a list of features from section 'Preprocessing'.
.. code-block:: python
# package MDAnalysis is required
import MDAnalysis as mda
from molann.feature import FeatureFileReader
# pdb file of the system
pdb_filename = '/path/to/system.pdb'
ref = mda.Universe(pdb_filename)
# read features from the section 'Histogram'
feature_reader = FeatureFileReader('feature.txt', 'Histogram', ref)
feature_list_1 = feature_reader.read()
# read features from the section 'Preprocessing'
feature_reader = FeatureFileReader('feature.txt', 'Preprocessing', ref)
feature_list_2 = feature_reader.read()
"""
def __init__(self, feature_file, section_name, universe):
self.feature_file = feature_file
self.section_name = section_name
self.u = universe
[docs] def read(self):
"""
read features from file
Returns:
list of :class:`Feature`, a list of features constructed from the corresponding section of the feature file
"""
self.feature_list = []
pp_cfg_file = open(self.feature_file, "r")
in_section = False
for line in pp_cfg_file:
line = line.strip()
if not line or line.startswith("#") :
continue
if line.startswith("["):
if line.strip('[]') == self.section_name :
in_section = True
continue
if in_section and line.strip('[]') == 'End':
break
if in_section :
ag = None
feature_name, feature_type, *selector_list = line.split(',')
for selector in selector_list:
if ag is None :
ag = self.u.select_atoms(selector)
else :
ag = ag + self.u.select_atoms(selector)
feature = Feature(feature_name.strip(), feature_type.strip(), ag)
self.feature_list.append(feature)
pp_cfg_file.close()
return self.feature_list
[docs] def get_feature_list(self):
"""
Returns:
list of :class:`Feature`, feature list constructed by :meth:`read`
"""
return self.feature_list
[docs] def get_num_of_features(self):
"""
Returns:
int, number of features in the feature list
"""
return len(self.feature_list)
[docs] def get_feature_info(self):
"""
Returns:
:external+pandas:class:`pandas.DataFrame`, which contains information of all features (each row describes a feature)
"""
df = pd.DataFrame()
for f in self.feature_list:
f_info = f.get_feature_info()
df = pd.concat([df, f_info], ignore_index=True)
return df