Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions scripts/conformer_filter_density/ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Conformer Filter Density

Filter conformers using a variety of metrics. See arguments in script for details.

CCDC Python API Licence required, minimum version: 3.0.15
Comment on lines +1 to +5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please expan on this Readme to include the sections that is in the template https://github.com/ccdc-opensource/csd-python-api-scripts/tree/main/scripts/new_script_readme_template

189 changes: 189 additions & 0 deletions scripts/conformer_filter_density/conformer_filter_density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#!/usr/bin/env python3
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2026-02-03: created by the Cambridge Crystallographic Data Centre

import argparse
import csv

from ccdc import conformer, io


def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description=__doc__)

parser.add_argument('inmolfn',
metavar='<input molecule file>',
help='Input file (single- or multi-molecule file)')

parser.add_argument('-m', '--mode',
choices=['absolute', 'relative'],
default='absolute',
help='Limit mode: absolute (fixed threshold) or relative '
'(threshold based on molecule with fewest unusual torsions). '
'WARNING: Relative mode may behave unexpectedly with conformers '
'from multiple input molecules (default: %(default)s)')

parser.add_argument('-l', '--limit',
dest='torsion_limit',
type=int,
default=0,
metavar='<limit>',
help='Maximum number of unusual torsions for a passing molecule '
'(default: %(default)s)')

parser.add_argument('-d', '--local-density',
dest='local_density_threshold',
type=float,
default=10.0,
metavar='<threshold>',
help='Local density threshold for classifying a torsion as unusual '
'(default: %(default)s)')

parser.add_argument('--incl-organometallics',
dest='incl_organometallics',
action='store_true',
help='Include organometallic compounds in the search '
'(default: organic compounds only)')

parser.add_argument('--generalisation',
action='store_true',
help='Turn on generalisation for searches')

parser.add_argument('--successfn',
default='successes.mol',
metavar='<file>',
help='Output file for molecules that pass the filter '
'(default: %(default)s)')

parser.add_argument('--failurefn',
default='failures.mol',
metavar='<file>',
help='Output file for molecules that fail the filter '
'(default: %(default)s)')

parser.add_argument('-u', '--unusual-torsions',
dest='unusualtorsionfn',
default='unusual_torsions.csv',
metavar='<file>',
help='Output CSV file for unusual torsion details '
'(default: %(default)s)')

return parser.parse_args()


def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
"""Create and configure a geometry analyser engine.

Args:
local_density_threshold: Threshold for classifying torsions as unusual
incl_organometallics: Whether to include organometallic compounds
generalisation: Whether to enable generalisation for searches

Returns:
Configured ccdc.conformer.GeometryAnalyser instance
"""
engine = conformer.GeometryAnalyser()

engine.settings.bond.analyse = False
engine.settings.angle.analyse = False
engine.settings.ring.analyse = False

engine.settings.torsion.local_density_threshold = local_density_threshold
engine.settings.generalisation = generalisation
engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'

return engine


def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
"""Analyze molecules for unusual torsions and filter based on criteria.

Args:
torsion_limit: Maximum number of unusual torsions allowed
input_filename: Path to input molecule file
mode: 'absolute' or 'relative' filtering mode
engine: Configured GeometryAnalyser instance
success_file: Path to output file for passing molecules
failure_file: Path to output file for failing molecules
unusual_torsion_file: Path to CSV file for unusual torsion details
"""
# Analyze all molecules and collect unusual torsion data
molecules = []
min_unusual_torsions = float('inf')

with io.MoleculeReader(input_filename) as mol_reader:
for molecule in mol_reader:
molecule.standardise_aromatic_bonds()
molecule.standardise_delocalised_bonds()

geometry_analysed_molecule = engine.analyse_molecule(molecule)

molecule.unusual_torsions = [
t for t in geometry_analysed_molecule.analysed_torsions
if t.unusual and t.enough_hits
]
molecule.num_unusual_torsions = len(molecule.unusual_torsions)
molecules.append(molecule)

min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)

# Write results
with io.MoleculeWriter(success_file) as passed_writer, \
io.MoleculeWriter(failure_file) as failed_writer, \
open(unusual_torsion_file, 'w', newline='') as csv_file:

csv_writer = csv.writer(csv_file)
csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])

for idx, molecule in enumerate(molecules):
threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
failed = molecule.num_unusual_torsions > threshold

if failed:
failed_writer.write(molecule)
for torsion in molecule.unusual_torsions:
csv_writer.writerow([
idx,
torsion.value,
torsion.z_score,
torsion.local_density,
torsion.nhits,
' '.join(torsion.atom_labels)
])
else:
passed_writer.write(molecule)


def run():
"""Main entry point for the script."""
args = parse_args()

if args.torsion_limit < 0:
raise ValueError('Torsion limit must be >= 0')

engine = create_mogul_engine(
args.local_density_threshold,
args.incl_organometallics,
args.generalisation
)

analysis(
args.torsion_limit,
args.inmolfn,
args.mode,
engine,
args.successfn,
args.failurefn,
args.unusualtorsionsfn,
)


if __name__ == '__main__':
run()
5 changes: 5 additions & 0 deletions scripts/filter_poses/ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Filter Poses

This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.

CCDC Python API Licence required, minimum version: 3.0.15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as previous comment please :)

And can you add the scripts to the ReadMe in the script folder too please :)

189 changes: 189 additions & 0 deletions scripts/filter_poses/filter_poses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#!/usr/bin/env python3
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2025-02-03: created by the Cambridge Crystallographic Data Centre

import argparse
import copy
import csv
import math

from ccdc import conformer, io


def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description=__doc__)

parser.add_argument('conformer_file',
metavar='<input file>',
help='Input file (multi-molecule file)')

parser.add_argument('-csv', '--write-csv',
dest='write_csv',
action='store_true',
help='Write a csv file for all the analysed conformers.')

return parser.parse_args()


class ProbabilityScorer:
"""
Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational
probabilities and unusual torsions.
"""
def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True,
user_mogul_analysis_settings=None):

self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation)
self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings)

def _create_analyser(self, user_mogul_analyser_settings):
"""Create a GeometryAnalyser engine to analyse the conformers."""
engine = conformer.GeometryAnalyser()

# Tweak the 'default defaults'
# By default, in this use case, we do not want to use generalisation.
engine.settings.generalisation = False
# By default, only the organic subset, i.e. exclude organometallic
engine.settings.organometallic_filter = 'Organic'

# Ensure user settings are kept:
if user_mogul_analyser_settings is not None:
engine.settings = copy.copy(user_mogul_analyser_settings)

# Only analyse torsions:
engine.settings.bond.analyse = False
engine.settings.angle.analyse = False
engine.settings.ring.analyse = False

return engine

def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True):
"""Create a ConformerGenerator engine to generate conformers for the molecules."""
settings = conformer.ConformerSettings()
if user_generator_settings is not None:
settings = copy.copy(user_generator_settings)

# Mandatory setting here: this must work in use_input_torsion_distributions mode.
settings.use_input_torsion_distributions = True

engine = conformer.ConformerGenerator(settings, skip_minimisation)
return engine

@staticmethod
def _combined_local_density(all_local_densities):
if all_local_densities:
return sum(all_local_densities) / len(all_local_densities)
return 100.0

def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0,
bad_rmsd_score=9999.9):

# Approximation excluding rings:
is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0

conformers = self._generator.generate(molecule)

normalised_score = None
rmsd = None
ln_probability = None

if conformers:
# First conformer is the most likely, so return its score.
normalised_score = conformers[0].normalised_score
rmsd = conformers[0].rmsd()
ln_probability = conformers[0].probability

if is_rigid:
if ln_probability is None:
ln_probability = 0.0
if normalised_score is None:
normalised_score = 0.0

if normalised_score is None:
normalised_score = bad_normalised_score

if rmsd is None:
rmsd = bad_rmsd_score

probability = bad_probability_score
if ln_probability is not None:
probability = math.exp(ln_probability)

return normalised_score, probability, rmsd

def unusual_torsions_analysis(self, molecule, max_hist_size=15):
checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule)
unusual_count = 0
local_densities = []
unusual_local_densities = []
no_data_torsions = 0
for tor in checked_mol.analysed_torsions:
hist_size = sum(tor.histogram())
if hist_size > max_hist_size:
local_densities.append(tor.local_density)

if tor.unusual:
unusual_local_densities.append(tor.local_density)
unusual_count += 1
else:
no_data_torsions += 1

# Expected number of torsions within 10 degrees assuming even distribution of observations.
# Number of Mogul bins would be strictly better than assuming 18.
uniform_torsion_dist = 100 / 18
local_densities.append(uniform_torsion_dist)
return unusual_count, self._combined_local_density(local_densities), self._combined_local_density(
unusual_local_densities), no_data_torsions

def process_molecule(self, molecule):
molecule.remove_unknown_atoms()
molecule.assign_bond_types()

unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis(
molecule)
normalised_score, probability, rmsd = self.probability_analysis(molecule)

return {
"identifier": molecule.identifier,
"number of unusual torsions": unusual_count,
"average local density": avg_local_density,
"average unusual local density": avg_unusual_local_density,
"number of torsions with no data": no_data_torsions,
"normalised probability score": normalised_score,
"probability score": probability,
"RMSD to input conformation": rmsd,
}


def run():
args = parse_args()

p = ProbabilityScorer()

all_data = []

with io.MoleculeReader(args.conformer_file) as reader:
for i, molecule in enumerate(reader):
data = p.process_molecule(molecule)
if i == 0:
print(", ".join(data.keys()))
print(", ".join(str(value) for value in data.values()))
all_data.append(data)

if args.write_csv:
keys = all_data[0].keys()
with open('filtered_poses_analysis.csv', 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(all_data)


if __name__ == "__main__":
run()
Loading