pyfock.Mol

  1# Mol.py
  2# Author: Manas Sharma ([email protected])
  3# This is a part of CrysX (https://bragitoff.com/crysx)
  4#
  5#
  6#  .d8888b.                            Y88b   d88P       8888888b.           8888888888                888      
  7# d88P  Y88b                            Y88b d88P        888   Y88b          888                       888      
  8# 888    888                             Y88o88P         888    888          888                       888      
  9# 888        888d888 888  888 .d8888b     Y888P          888   d88P 888  888 8888888  .d88b.   .d8888b 888  888 
 10# 888        888P"   888  888 88K         d888b          8888888P"  888  888 888     d88""88b d88P"    888 .88P 
 11# 888    888 888     888  888 "Y8888b.   d88888b  888888 888        888  888 888     888  888 888      888888K  
 12# Y88b  d88P 888     Y88b 888      X88  d88P Y88b        888        Y88b 888 888     Y88..88P Y88b.    888 "88b 
 13#  "Y8888P"  888      "Y88888  88888P' d88P   Y88b       888         "Y88888 888      "Y88P"   "Y8888P 888  888 
 14#                         888                                            888                                    
 15#                    Y8b d88P                                       Y8b d88P                                    
 16#                     "Y88P"                                         "Y88P"                                       
 17from . import Data
 18from . import Basis
 19import numpy as np
 20#Class to store molecular properties
 21class Mol:
 22    """
 23    Class to store molecular properties and handle molecular structure operations.
 24    
 25    This class provides functionality for creating, manipulating, and exporting molecular
 26    structures. It supports reading from various coordinate file formats and automatically
 27    generates basis set information.
 28    
 29    Attributes:
 30        coordfile (str): Path to coordinate file
 31        atoms (list): List of atoms with coordinates
 32        charge (int): Molecular charge
 33        basis (Basis): Basis set object for the molecule
 34        nelectrons (int): Number of electrons
 35        natoms (int): Number of atoms
 36        coords (numpy.ndarray): Atomic coordinates in Angstroms
 37        coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units
 38        atomicSpecies (list): List of atomic symbols
 39        Zcharges (list): List of atomic numbers
 40        success (bool): Whether molecule was initialized successfully
 41        label (str): Molecular label/description
 42    """
 43    def __init__(self, atoms=None, coordfile=None, charge=0, basis=None):
 44        """
 45        Initialize a Mol object with atomic coordinates and properties.
 46        
 47        Args:
 48            atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 
 49                                   where symbol can be atomic symbol (str) or atomic number (int),
 50                                   and x, y, z are coordinates in Angstroms.
 51            coordfile (str, optional): Path to coordinate file (.xyz format supported).
 52            charge (int, optional): Molecular charge. Defaults to 0.
 53            basis (dict, optional): Basis set specification. If None, defaults to STO-3G 
 54                                   for all atoms.
 55        
 56        Raises:
 57            ValueError: If neither atoms nor coordfile is provided.
 58            TypeError: If coordfile is not a string.
 59        
 60        Note:
 61            If both atoms and coordfile are provided, atoms takes precedence and 
 62            coordfile is ignored.
 63        """ 
 64        #If no atoms or coordfile is specified.
 65        if atoms is None and coordfile is None:
 66            print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.')
 67            print('See manual for supported file formats.')
 68        #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered.
 69        elif not coordfile is None and not atoms is None:
 70            print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.')
 71        elif coordfile is not None and atoms is None:
 72            if isinstance(coordfile, str):
 73                atoms = self.readCoordsFile(coordfile)
 74            else:
 75                print('Error: The coordfile argument should be a string with the name of the coord file.')
 76
 77        
 78        #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file
 79        self.coordfile = coordfile
 80        """str or None: Path to the coordinate file used to initialize the molecule.
 81        Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 
 82        if the molecule was created from a file, or None if created directly from atoms list."""
 83        #Atoms and their coordinates
 84        self.atoms = atoms
 85        """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where:
 86        - symbol: Atomic symbol (str) or atomic number (int)
 87        - x, y, z: Cartesian coordinates in Angstroms (float)
 88        This is the raw input data used to construct the molecule."""
 89        #Molecular charge
 90        self.charge = charge
 91        """int: Net charge of the molecule in elementary charge units.
 92        Positive values indicate cationic species, negative values indicate anionic species, 
 93        and 0 indicates a neutral molecule. Used to calculate the total number of electrons."""
 94        #The basis object for the Mol object
 95        self.basis = None
 96        """Basis or None: Basis set object containing all basis function information for quantum 
 97        chemical calculations. Initialized as None and populated with a Basis instance that 
 98        handles basis set assignments for each atom in the molecule."""
 99
100        self.nelectrons = 0
101        """int: Total number of electrons in the molecule.
102        Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge:
103        nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i."""
104        self.natoms = 0
105        """int: Total number of atoms in the molecule.
106        Incremented during atom validation and used for array dimensioning and iteration 
107        over atomic properties."""
108        self.coords = []
109        """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3).
110        Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 
111        and populated during atom validation to become a numpy array."""
112        self.coordsBohrs = []
113        """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3).
114        Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 
115        Used for quantum chemical calculations requiring atomic units of length."""
116        self.atomicSpecies = []
117        """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']).
118        Ordered the same as atoms appear in the molecule. Length equals natoms. 
119        Used for identifying atom types and output formatting."""
120
121        self.Zcharges = []
122        """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule.
123        Corresponds to the number of protons in each atomic nucleus. Used for calculating 
124        electronic properties and nuclear contributions to various molecular properties."""
125        self.success = False
126        """bool: Flag indicating whether the molecule was successfully initialized and validated.
127        Set to True if all atoms have valid symbols/atomic numbers and properly formatted 
128        coordinates, False otherwise. Used to prevent operations on invalid molecular data."""
129
130        self.label = 'Generic mol'
131        """str: Descriptive label or name for the molecule.
132        Defaults to 'Generic mol' but can be customized. Used in output files and for 
133        identification purposes when exporting molecular data to various file formats."""
134        #Validate if the atoms attribute are provided correctly
135        self.success = self.validateAtoms(atoms)
136        if self.success:
137            self.nelectrons = self.nelectrons+charge
138            self.coordsBohrs = self.coords*Data.Angs2BohrFactor
139            #Basis set names for the atoms.
140            #Either just one basis function can be specified for all atoms
141            #Or one can specify a particular basis set for a particular atom.
142            #'basis' is a dictionary specifying the basis set to be used for a particular atom
143            if basis is None:
144                #Default basis set is sto-3g for all atoms
145                self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'}
146                self.basis = Basis(self, self.basis)
147            else:
148                self.basis = Basis(self, basis)
149        
150
151        
152        
153
154    def validateAtoms(self, atoms):
155        """
156        Validate atomic symbols and coordinates provided in the atoms list.
157        
158        This method checks that atomic symbols are valid (either as strings matching
159        known element symbols or as integers representing atomic numbers), and that
160        coordinates are properly formatted as numerical values.
161        
162        Args:
163            atoms (list): List of atoms in format [[symbol, x, y, z], ...] where
164                         symbol can be str (element symbol) or int (atomic number),
165                         and x, y, z are numerical coordinates.
166        
167        Returns:
168            bool: True if all atoms are valid, False otherwise.
169        
170        Side Effects:
171            Updates the following instance attributes:
172            - atomicSpecies: List of atomic symbols
173            - Zcharges: List of atomic numbers
174            - nelectrons: Total number of electrons
175            - natoms: Number of atoms
176            - coords: Numpy array of coordinates
177        
178        Raises:
179            Prints error messages for invalid atomic symbols, coordinates, or formatting.
180        """
181        #Validate atomic symbols
182        for i in range(len(atoms)):
183            if isinstance(atoms[i][0], str):
184                elementSymbols = [x.lower() for x in Data.elementSymbols]
185                if atoms[i][0].lower() in elementSymbols:
186                    self.atomicSpecies.append(atoms[i][0])
187                    Z = elementSymbols.index(atoms[i][0].lower())
188                    self.Zcharges.append(Z)
189                    self.nelectrons = self.nelectrons + Z 
190                    self.natoms = self.natoms + 1
191                else:
192                    print('Error: Unknown atomic symbols found')
193                    return False
194            elif isinstance(atoms[i][0], int):
195                if atoms[i][0]<=118 and atoms[i][0]>=0:
196                    self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]])
197                    Z = atoms[i][0]
198                    self.Zcharges.append(Z)
199                    self.nelectrons = self.nelectrons + Z 
200                    self.natoms = self.natoms + 1
201                else:
202                    print('Error: Found atomic number out of the valid range')
203                    return False
204            else:
205                print('Error: Found something other than string or integer for atomic symbols/numbers')
206                return False
207        self.coords = np.zeros([self.natoms,3])
208        #Validate atomic coords
209        for i in range(len(atoms)):
210            if len(atoms[i])==4:
211                #TODO: the following condition doesn't work
212                #FIXME
213                if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]):
214                    self.coords[i] = atoms[i][1:4]
215                else:
216                    print('Error: Coordinates of atoms should be floats/integers.')
217                    return False
218
219            else:
220                print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.')
221                return False
222        
223        if self.natoms==self.coords.shape[0]:
224            return True
225        else:
226            print('Error: Some definitions of atomic symbols/coords were illegal')
227            
228
229    def get_center_of_charge(self, units='angs'):
230        """
231        Calculate the center of charge (weighted by atomic numbers) of the molecule.
232        
233        The center of charge is computed as the weighted average of atomic positions,
234        where the weights are the atomic numbers (nuclear charges) of each atom.
235        
236        Returns:
237            numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms.
238                                  Returns None if the molecule was not successfully initialized.
239        
240        Formula:
241            center_of_charge = Σ(Z_i * r_i) / Σ(Z_i)
242            where Z_i is the atomic number and r_i is the position of atom i.
243        """
244        if not self.success:
245            print("Error: Mol object is not successfully initialized.")
246            return None
247
248        # Use NumPy for efficient calculations
249        total_charge = np.sum(self.Zcharges)
250        if units=='angs':
251            coc = np.dot(self.Zcharges, self.coords) / total_charge
252        else:
253            coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge
254
255        return coc
256        # # Use NumPy for efficient calculations
257        # total_mass = np.sum(self.Zcharges)
258        # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass
259        # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass
260        # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass
261
262        # com = np.array([com_x, com_y, com_z])
263        # return com
264    
265    def get_nuc_dip_moment(self):
266        """
267        Calculate the nuclear contribution to the molecular dipole moment.
268        
269        The nuclear dipole moment is computed as the sum of nuclear charges
270        multiplied by their positions in atomic units (Bohr).
271        
272        Returns:
273            numpy.ndarray: 3D vector representing the nuclear dipole moment
274                          in atomic units (e⋅a₀), where each component corresponds
275                          to x, y, z directions.
276        
277        Formula:
278            μ_nuc = Σ(Z_i * r_i)
279            where Z_i is the nuclear charge and r_i is the position in Bohr units.
280        """
281        charges = self.Zcharges
282        coords  = self.coordsBohrs 
283        nuc_dip = np.einsum('i,ix->x', charges, coords)
284        return nuc_dip
285
286    def get_elec_dip_moment(self, dipole_moment_matrix, dmat):
287        """
288        Calculate the electronic contribution to the molecular dipole moment.
289        
290        The electronic dipole moment is computed using the dipole moment integrals
291        and the density matrix from quantum chemical calculations.
292        
293        Args:
294            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
295                                                 with shape (3, nbasis, nbasis), where
296                                                 the first dimension corresponds to x, y, z.
297            dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis).
298        
299        Returns:
300            numpy.ndarray: 3D vector representing the electronic dipole moment
301                          in atomic units (e⋅a₀), where each component corresponds
302                          to x, y, z directions.
303        
304        Formula:
305            μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩
306            where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals.
307        """
308        elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real
309        return elec_dip
310    
311    def get_dipole_moment(self, dipole_moment_matrix, dmat):
312        """
313        Calculate the total molecular dipole moment.
314        
315        The total dipole moment is computed as the difference between nuclear
316        and electronic contributions: μ_total = μ_nuclear - μ_electronic
317        
318        Args:
319            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
320                                                 with shape (3, nbasis, nbasis).
321            dmat (numpy.ndarray): Density matrix from quantum chemical calculation
322                                 with shape (nbasis, nbasis).
323        
324        Returns:
325            numpy.ndarray: 3D vector representing the total molecular dipole moment
326                          in atomic units (e⋅a₀). Each component corresponds to
327                          x, y, z directions.
328        
329        Note:
330            The sign convention follows: μ_total = μ_nuclear - μ_electronic
331            This gives the dipole moment vector pointing from negative to positive charge.
332            
333        TODO: Allow user to specify output units (e.g., Debye).
334        """
335        nuc_dip = self.get_nuc_dip_moment()
336        elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat)
337        mol_dip = nuc_dip - elec_dip
338        # TODO: allow the user to specify units (ex: DEBYE)
339        return mol_dip
340    
341    #Export coord file in TURBOMOLE format
342    def exportCoords(atomicSpecies, coords, filename='coord'):
343        """
344        Export molecular coordinates to TURBOMOLE format.
345        
346        Creates a coordinate file in TURBOMOLE format with atomic positions
347        converted from Angstroms to Bohr units.
348        
349        Args:
350            atomicSpecies (list): List of atomic symbols as strings.
351            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
352            filename (str, optional): Output filename. Defaults to 'coord'.
353        
354        File Format:
355            $coord
356            x_bohr y_bohr z_bohr element_symbol
357            ...
358            $end
359        
360        Note:
361            This is a static method that should be called as a class method.
362            Coordinates are automatically converted from Angstroms to Bohr units.
363        
364        Warning:
365            This method contains a bug - it references undefined variable 'coord'
366            instead of 'coords'. Should be fixed in implementation.
367        """
368        file = open(filename,'w')
369        file.write('$coord\n')
370        for i in range(len(atomicSpecies)):
371            file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n')
372        file.write('$end\n')
373        file.close()
374
375    #Export the geometry in XYZ format
376    def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
377        """
378        Export molecular geometry to XYZ format file.
379        
380        Creates a standard XYZ format file with atomic coordinates in Angstroms.
381        This method uses the molecule's current atomic species and coordinates.
382        
383        Args:
384            filename (str, optional): Base filename without extension. Defaults to 'coord'.
385                                     The '.xyz' extension will be automatically added.
386            label (str, optional): Comment line for the XYZ file. Defaults to 
387                                  'Generic Mol generated by CrysX (Python Library)'.
388        
389        File Format:
390            number_of_atoms
391            comment_line
392            element_symbol x_coord y_coord z_coord
393            ...
394        
395        Output:
396            Creates a file named '{filename}.xyz' in the current directory.
397        
398        Example:
399            mol.exportXYZ('water', 'H2O molecule')
400            # Creates 'water.xyz' with H2O coordinates
401        """
402        file = open(filename+'.xyz','w')
403        file.write(str(self.natoms)+'\n')
404        file.write(label+'\n')
405        for i in range(self.natoms):
406            file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n')
407        file.close()   
408    #Export the geometry in XYZ format
409    def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
410        """
411        Export molecular geometry to XYZ format (static method version).
412        
413        Static method to create XYZ files from provided atomic species and coordinates,
414        without requiring a Mol object instance.
415        
416        Args:
417            atomicSpecies (list): List of atomic symbols as strings.
418            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
419            filename (str, optional): Base filename without extension. Defaults to 'coord'.
420                                     The '.xyz' extension will be automatically added.
421            label (str, optional): Comment line for the XYZ file. Defaults to 
422                                  'Generic Mol generated by CrysX (Python Library)'.
423        
424        File Format:
425            number_of_atoms
426            comment_line
427            element_symbol x_coord y_coord z_coord
428            ...
429        
430        Warning:
431            This method contains a bug - it references undefined variable 'coord'
432            instead of 'coords'. Should be fixed in implementation.
433        
434        Note:
435            This is a static method and should be called as a class method.
436        """
437        file = open(filename+'.xyz','w')
438        file.write(str(len(atomicSpecies))+'\n')
439        file.write(label+'\n')
440        for i in range(len(atomicSpecies)):
441            file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n')
442        file.close()
443    
444    # Read geometry from TURBOOLE coords file
445    def readCoordsFile(self, filename):
446        """
447        Read molecular coordinates from various file formats.
448        
449        Currently supports XYZ format files. This method serves as a dispatcher
450        to format-specific reading methods based on file extension.
451        
452        Args:
453            filename (str): Path to the coordinate file. File format is determined
454                           by the file extension.
455        
456        Returns:
457            list: List of atoms in the format [[symbol, x, y, z], ...] where
458                  symbol is the atomic symbol and x, y, z are coordinates in Angstroms.
459        
460        Supported Formats:
461            - .xyz: Standard XYZ coordinate format
462        
463        Future Extensions:
464            - .mol: MOL file format
465            - coord: TURBOMOLE coordinate format
466            - Other quantum chemistry input formats
467        
468        Raises:
469            Prints error message if file format is not supported.
470        """
471        if filename.endswith('.xyz'):
472            atoms = self.readXYZ(filename)
473        else:
474            print('Only xyz files for now') 
475        return atoms
476    #Read geometry information from a .xyz file
477    def readXYZ(self, filename):
478        """
479        Read molecular geometry from an XYZ format file.
480        
481        Parses a standard XYZ file and extracts atomic symbols and coordinates.
482        The XYZ format consists of:
483        - Line 1: Number of atoms
484        - Line 2: Comment line (ignored)
485        - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate
486        
487        Args:
488            filename (str): Path to the XYZ file to read.
489        
490        Returns:
491            list: List of atoms in format [[symbol, x, y, z], ...] where:
492                  - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O')
493                  - x, y, z (float): Coordinates in Angstroms
494        
495        File Format Expected:
496            number_of_atoms
497            comment_line
498            element_symbol x_coord y_coord z_coord
499            element_symbol x_coord y_coord z_coord
500            ...
501        
502        Error Handling:
503            - Prints error if the number of atoms read doesn't match the declared count
504            - Assumes coordinates are separated by whitespace
505            - Strips whitespace from all input
506        
507        Example:
508            For a file 'water.xyz':
509            3
510            Water molecule
511            O    0.000000    0.000000    0.000000
512            H    0.000000    0.000000    0.970000
513            H    0.944863    0.000000   -0.242498
514            
515            Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]]
516        """
517        with open(filename, "r") as f:
518            lines = f.readlines()
519
520        if len(lines) < 2:
521            raise ValueError(f"File {filename} does not contain enough lines for XYZ format")
522
523        # Read number of atoms from first line
524        try:
525            natoms = int(lines[0].strip())
526        except ValueError:
527            raise ValueError(f"First line of {filename} must contain an integer atom count")
528
529        # The second line is the comment (can be blank)
530        comment_line = lines[1].rstrip("\n")
531
532        atoms = []
533        for i, line in enumerate(lines[2:2 + natoms], start=3):
534            parts = line.split()
535            if len(parts) < 4:
536                raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}")
537            symbol = parts[0]
538            try:
539                x, y, z = map(float, parts[1:4])
540            except ValueError:
541                raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}")
542            atoms.append([symbol, x, y, z])
543
544        if len(atoms) != natoms:
545            raise ValueError(
546                f"Expected {natoms} atoms, but found {len(atoms)} in {filename}"
547            )
548
549        return atoms
class Mol:
 22class Mol:
 23    """
 24    Class to store molecular properties and handle molecular structure operations.
 25    
 26    This class provides functionality for creating, manipulating, and exporting molecular
 27    structures. It supports reading from various coordinate file formats and automatically
 28    generates basis set information.
 29    
 30    Attributes:
 31        coordfile (str): Path to coordinate file
 32        atoms (list): List of atoms with coordinates
 33        charge (int): Molecular charge
 34        basis (Basis): Basis set object for the molecule
 35        nelectrons (int): Number of electrons
 36        natoms (int): Number of atoms
 37        coords (numpy.ndarray): Atomic coordinates in Angstroms
 38        coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units
 39        atomicSpecies (list): List of atomic symbols
 40        Zcharges (list): List of atomic numbers
 41        success (bool): Whether molecule was initialized successfully
 42        label (str): Molecular label/description
 43    """
 44    def __init__(self, atoms=None, coordfile=None, charge=0, basis=None):
 45        """
 46        Initialize a Mol object with atomic coordinates and properties.
 47        
 48        Args:
 49            atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 
 50                                   where symbol can be atomic symbol (str) or atomic number (int),
 51                                   and x, y, z are coordinates in Angstroms.
 52            coordfile (str, optional): Path to coordinate file (.xyz format supported).
 53            charge (int, optional): Molecular charge. Defaults to 0.
 54            basis (dict, optional): Basis set specification. If None, defaults to STO-3G 
 55                                   for all atoms.
 56        
 57        Raises:
 58            ValueError: If neither atoms nor coordfile is provided.
 59            TypeError: If coordfile is not a string.
 60        
 61        Note:
 62            If both atoms and coordfile are provided, atoms takes precedence and 
 63            coordfile is ignored.
 64        """ 
 65        #If no atoms or coordfile is specified.
 66        if atoms is None and coordfile is None:
 67            print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.')
 68            print('See manual for supported file formats.')
 69        #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered.
 70        elif not coordfile is None and not atoms is None:
 71            print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.')
 72        elif coordfile is not None and atoms is None:
 73            if isinstance(coordfile, str):
 74                atoms = self.readCoordsFile(coordfile)
 75            else:
 76                print('Error: The coordfile argument should be a string with the name of the coord file.')
 77
 78        
 79        #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file
 80        self.coordfile = coordfile
 81        """str or None: Path to the coordinate file used to initialize the molecule.
 82        Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 
 83        if the molecule was created from a file, or None if created directly from atoms list."""
 84        #Atoms and their coordinates
 85        self.atoms = atoms
 86        """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where:
 87        - symbol: Atomic symbol (str) or atomic number (int)
 88        - x, y, z: Cartesian coordinates in Angstroms (float)
 89        This is the raw input data used to construct the molecule."""
 90        #Molecular charge
 91        self.charge = charge
 92        """int: Net charge of the molecule in elementary charge units.
 93        Positive values indicate cationic species, negative values indicate anionic species, 
 94        and 0 indicates a neutral molecule. Used to calculate the total number of electrons."""
 95        #The basis object for the Mol object
 96        self.basis = None
 97        """Basis or None: Basis set object containing all basis function information for quantum 
 98        chemical calculations. Initialized as None and populated with a Basis instance that 
 99        handles basis set assignments for each atom in the molecule."""
100
101        self.nelectrons = 0
102        """int: Total number of electrons in the molecule.
103        Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge:
104        nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i."""
105        self.natoms = 0
106        """int: Total number of atoms in the molecule.
107        Incremented during atom validation and used for array dimensioning and iteration 
108        over atomic properties."""
109        self.coords = []
110        """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3).
111        Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 
112        and populated during atom validation to become a numpy array."""
113        self.coordsBohrs = []
114        """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3).
115        Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 
116        Used for quantum chemical calculations requiring atomic units of length."""
117        self.atomicSpecies = []
118        """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']).
119        Ordered the same as atoms appear in the molecule. Length equals natoms. 
120        Used for identifying atom types and output formatting."""
121
122        self.Zcharges = []
123        """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule.
124        Corresponds to the number of protons in each atomic nucleus. Used for calculating 
125        electronic properties and nuclear contributions to various molecular properties."""
126        self.success = False
127        """bool: Flag indicating whether the molecule was successfully initialized and validated.
128        Set to True if all atoms have valid symbols/atomic numbers and properly formatted 
129        coordinates, False otherwise. Used to prevent operations on invalid molecular data."""
130
131        self.label = 'Generic mol'
132        """str: Descriptive label or name for the molecule.
133        Defaults to 'Generic mol' but can be customized. Used in output files and for 
134        identification purposes when exporting molecular data to various file formats."""
135        #Validate if the atoms attribute are provided correctly
136        self.success = self.validateAtoms(atoms)
137        if self.success:
138            self.nelectrons = self.nelectrons+charge
139            self.coordsBohrs = self.coords*Data.Angs2BohrFactor
140            #Basis set names for the atoms.
141            #Either just one basis function can be specified for all atoms
142            #Or one can specify a particular basis set for a particular atom.
143            #'basis' is a dictionary specifying the basis set to be used for a particular atom
144            if basis is None:
145                #Default basis set is sto-3g for all atoms
146                self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'}
147                self.basis = Basis(self, self.basis)
148            else:
149                self.basis = Basis(self, basis)
150        
151
152        
153        
154
155    def validateAtoms(self, atoms):
156        """
157        Validate atomic symbols and coordinates provided in the atoms list.
158        
159        This method checks that atomic symbols are valid (either as strings matching
160        known element symbols or as integers representing atomic numbers), and that
161        coordinates are properly formatted as numerical values.
162        
163        Args:
164            atoms (list): List of atoms in format [[symbol, x, y, z], ...] where
165                         symbol can be str (element symbol) or int (atomic number),
166                         and x, y, z are numerical coordinates.
167        
168        Returns:
169            bool: True if all atoms are valid, False otherwise.
170        
171        Side Effects:
172            Updates the following instance attributes:
173            - atomicSpecies: List of atomic symbols
174            - Zcharges: List of atomic numbers
175            - nelectrons: Total number of electrons
176            - natoms: Number of atoms
177            - coords: Numpy array of coordinates
178        
179        Raises:
180            Prints error messages for invalid atomic symbols, coordinates, or formatting.
181        """
182        #Validate atomic symbols
183        for i in range(len(atoms)):
184            if isinstance(atoms[i][0], str):
185                elementSymbols = [x.lower() for x in Data.elementSymbols]
186                if atoms[i][0].lower() in elementSymbols:
187                    self.atomicSpecies.append(atoms[i][0])
188                    Z = elementSymbols.index(atoms[i][0].lower())
189                    self.Zcharges.append(Z)
190                    self.nelectrons = self.nelectrons + Z 
191                    self.natoms = self.natoms + 1
192                else:
193                    print('Error: Unknown atomic symbols found')
194                    return False
195            elif isinstance(atoms[i][0], int):
196                if atoms[i][0]<=118 and atoms[i][0]>=0:
197                    self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]])
198                    Z = atoms[i][0]
199                    self.Zcharges.append(Z)
200                    self.nelectrons = self.nelectrons + Z 
201                    self.natoms = self.natoms + 1
202                else:
203                    print('Error: Found atomic number out of the valid range')
204                    return False
205            else:
206                print('Error: Found something other than string or integer for atomic symbols/numbers')
207                return False
208        self.coords = np.zeros([self.natoms,3])
209        #Validate atomic coords
210        for i in range(len(atoms)):
211            if len(atoms[i])==4:
212                #TODO: the following condition doesn't work
213                #FIXME
214                if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]):
215                    self.coords[i] = atoms[i][1:4]
216                else:
217                    print('Error: Coordinates of atoms should be floats/integers.')
218                    return False
219
220            else:
221                print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.')
222                return False
223        
224        if self.natoms==self.coords.shape[0]:
225            return True
226        else:
227            print('Error: Some definitions of atomic symbols/coords were illegal')
228            
229
230    def get_center_of_charge(self, units='angs'):
231        """
232        Calculate the center of charge (weighted by atomic numbers) of the molecule.
233        
234        The center of charge is computed as the weighted average of atomic positions,
235        where the weights are the atomic numbers (nuclear charges) of each atom.
236        
237        Returns:
238            numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms.
239                                  Returns None if the molecule was not successfully initialized.
240        
241        Formula:
242            center_of_charge = Σ(Z_i * r_i) / Σ(Z_i)
243            where Z_i is the atomic number and r_i is the position of atom i.
244        """
245        if not self.success:
246            print("Error: Mol object is not successfully initialized.")
247            return None
248
249        # Use NumPy for efficient calculations
250        total_charge = np.sum(self.Zcharges)
251        if units=='angs':
252            coc = np.dot(self.Zcharges, self.coords) / total_charge
253        else:
254            coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge
255
256        return coc
257        # # Use NumPy for efficient calculations
258        # total_mass = np.sum(self.Zcharges)
259        # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass
260        # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass
261        # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass
262
263        # com = np.array([com_x, com_y, com_z])
264        # return com
265    
266    def get_nuc_dip_moment(self):
267        """
268        Calculate the nuclear contribution to the molecular dipole moment.
269        
270        The nuclear dipole moment is computed as the sum of nuclear charges
271        multiplied by their positions in atomic units (Bohr).
272        
273        Returns:
274            numpy.ndarray: 3D vector representing the nuclear dipole moment
275                          in atomic units (e⋅a₀), where each component corresponds
276                          to x, y, z directions.
277        
278        Formula:
279            μ_nuc = Σ(Z_i * r_i)
280            where Z_i is the nuclear charge and r_i is the position in Bohr units.
281        """
282        charges = self.Zcharges
283        coords  = self.coordsBohrs 
284        nuc_dip = np.einsum('i,ix->x', charges, coords)
285        return nuc_dip
286
287    def get_elec_dip_moment(self, dipole_moment_matrix, dmat):
288        """
289        Calculate the electronic contribution to the molecular dipole moment.
290        
291        The electronic dipole moment is computed using the dipole moment integrals
292        and the density matrix from quantum chemical calculations.
293        
294        Args:
295            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
296                                                 with shape (3, nbasis, nbasis), where
297                                                 the first dimension corresponds to x, y, z.
298            dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis).
299        
300        Returns:
301            numpy.ndarray: 3D vector representing the electronic dipole moment
302                          in atomic units (e⋅a₀), where each component corresponds
303                          to x, y, z directions.
304        
305        Formula:
306            μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩
307            where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals.
308        """
309        elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real
310        return elec_dip
311    
312    def get_dipole_moment(self, dipole_moment_matrix, dmat):
313        """
314        Calculate the total molecular dipole moment.
315        
316        The total dipole moment is computed as the difference between nuclear
317        and electronic contributions: μ_total = μ_nuclear - μ_electronic
318        
319        Args:
320            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
321                                                 with shape (3, nbasis, nbasis).
322            dmat (numpy.ndarray): Density matrix from quantum chemical calculation
323                                 with shape (nbasis, nbasis).
324        
325        Returns:
326            numpy.ndarray: 3D vector representing the total molecular dipole moment
327                          in atomic units (e⋅a₀). Each component corresponds to
328                          x, y, z directions.
329        
330        Note:
331            The sign convention follows: μ_total = μ_nuclear - μ_electronic
332            This gives the dipole moment vector pointing from negative to positive charge.
333            
334        TODO: Allow user to specify output units (e.g., Debye).
335        """
336        nuc_dip = self.get_nuc_dip_moment()
337        elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat)
338        mol_dip = nuc_dip - elec_dip
339        # TODO: allow the user to specify units (ex: DEBYE)
340        return mol_dip
341    
342    #Export coord file in TURBOMOLE format
343    def exportCoords(atomicSpecies, coords, filename='coord'):
344        """
345        Export molecular coordinates to TURBOMOLE format.
346        
347        Creates a coordinate file in TURBOMOLE format with atomic positions
348        converted from Angstroms to Bohr units.
349        
350        Args:
351            atomicSpecies (list): List of atomic symbols as strings.
352            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
353            filename (str, optional): Output filename. Defaults to 'coord'.
354        
355        File Format:
356            $coord
357            x_bohr y_bohr z_bohr element_symbol
358            ...
359            $end
360        
361        Note:
362            This is a static method that should be called as a class method.
363            Coordinates are automatically converted from Angstroms to Bohr units.
364        
365        Warning:
366            This method contains a bug - it references undefined variable 'coord'
367            instead of 'coords'. Should be fixed in implementation.
368        """
369        file = open(filename,'w')
370        file.write('$coord\n')
371        for i in range(len(atomicSpecies)):
372            file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n')
373        file.write('$end\n')
374        file.close()
375
376    #Export the geometry in XYZ format
377    def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
378        """
379        Export molecular geometry to XYZ format file.
380        
381        Creates a standard XYZ format file with atomic coordinates in Angstroms.
382        This method uses the molecule's current atomic species and coordinates.
383        
384        Args:
385            filename (str, optional): Base filename without extension. Defaults to 'coord'.
386                                     The '.xyz' extension will be automatically added.
387            label (str, optional): Comment line for the XYZ file. Defaults to 
388                                  'Generic Mol generated by CrysX (Python Library)'.
389        
390        File Format:
391            number_of_atoms
392            comment_line
393            element_symbol x_coord y_coord z_coord
394            ...
395        
396        Output:
397            Creates a file named '{filename}.xyz' in the current directory.
398        
399        Example:
400            mol.exportXYZ('water', 'H2O molecule')
401            # Creates 'water.xyz' with H2O coordinates
402        """
403        file = open(filename+'.xyz','w')
404        file.write(str(self.natoms)+'\n')
405        file.write(label+'\n')
406        for i in range(self.natoms):
407            file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n')
408        file.close()   
409    #Export the geometry in XYZ format
410    def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
411        """
412        Export molecular geometry to XYZ format (static method version).
413        
414        Static method to create XYZ files from provided atomic species and coordinates,
415        without requiring a Mol object instance.
416        
417        Args:
418            atomicSpecies (list): List of atomic symbols as strings.
419            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
420            filename (str, optional): Base filename without extension. Defaults to 'coord'.
421                                     The '.xyz' extension will be automatically added.
422            label (str, optional): Comment line for the XYZ file. Defaults to 
423                                  'Generic Mol generated by CrysX (Python Library)'.
424        
425        File Format:
426            number_of_atoms
427            comment_line
428            element_symbol x_coord y_coord z_coord
429            ...
430        
431        Warning:
432            This method contains a bug - it references undefined variable 'coord'
433            instead of 'coords'. Should be fixed in implementation.
434        
435        Note:
436            This is a static method and should be called as a class method.
437        """
438        file = open(filename+'.xyz','w')
439        file.write(str(len(atomicSpecies))+'\n')
440        file.write(label+'\n')
441        for i in range(len(atomicSpecies)):
442            file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n')
443        file.close()
444    
445    # Read geometry from TURBOOLE coords file
446    def readCoordsFile(self, filename):
447        """
448        Read molecular coordinates from various file formats.
449        
450        Currently supports XYZ format files. This method serves as a dispatcher
451        to format-specific reading methods based on file extension.
452        
453        Args:
454            filename (str): Path to the coordinate file. File format is determined
455                           by the file extension.
456        
457        Returns:
458            list: List of atoms in the format [[symbol, x, y, z], ...] where
459                  symbol is the atomic symbol and x, y, z are coordinates in Angstroms.
460        
461        Supported Formats:
462            - .xyz: Standard XYZ coordinate format
463        
464        Future Extensions:
465            - .mol: MOL file format
466            - coord: TURBOMOLE coordinate format
467            - Other quantum chemistry input formats
468        
469        Raises:
470            Prints error message if file format is not supported.
471        """
472        if filename.endswith('.xyz'):
473            atoms = self.readXYZ(filename)
474        else:
475            print('Only xyz files for now') 
476        return atoms
477    #Read geometry information from a .xyz file
478    def readXYZ(self, filename):
479        """
480        Read molecular geometry from an XYZ format file.
481        
482        Parses a standard XYZ file and extracts atomic symbols and coordinates.
483        The XYZ format consists of:
484        - Line 1: Number of atoms
485        - Line 2: Comment line (ignored)
486        - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate
487        
488        Args:
489            filename (str): Path to the XYZ file to read.
490        
491        Returns:
492            list: List of atoms in format [[symbol, x, y, z], ...] where:
493                  - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O')
494                  - x, y, z (float): Coordinates in Angstroms
495        
496        File Format Expected:
497            number_of_atoms
498            comment_line
499            element_symbol x_coord y_coord z_coord
500            element_symbol x_coord y_coord z_coord
501            ...
502        
503        Error Handling:
504            - Prints error if the number of atoms read doesn't match the declared count
505            - Assumes coordinates are separated by whitespace
506            - Strips whitespace from all input
507        
508        Example:
509            For a file 'water.xyz':
510            3
511            Water molecule
512            O    0.000000    0.000000    0.000000
513            H    0.000000    0.000000    0.970000
514            H    0.944863    0.000000   -0.242498
515            
516            Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]]
517        """
518        with open(filename, "r") as f:
519            lines = f.readlines()
520
521        if len(lines) < 2:
522            raise ValueError(f"File {filename} does not contain enough lines for XYZ format")
523
524        # Read number of atoms from first line
525        try:
526            natoms = int(lines[0].strip())
527        except ValueError:
528            raise ValueError(f"First line of {filename} must contain an integer atom count")
529
530        # The second line is the comment (can be blank)
531        comment_line = lines[1].rstrip("\n")
532
533        atoms = []
534        for i, line in enumerate(lines[2:2 + natoms], start=3):
535            parts = line.split()
536            if len(parts) < 4:
537                raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}")
538            symbol = parts[0]
539            try:
540                x, y, z = map(float, parts[1:4])
541            except ValueError:
542                raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}")
543            atoms.append([symbol, x, y, z])
544
545        if len(atoms) != natoms:
546            raise ValueError(
547                f"Expected {natoms} atoms, but found {len(atoms)} in {filename}"
548            )
549
550        return atoms

Class to store molecular properties and handle molecular structure operations.

This class provides functionality for creating, manipulating, and exporting molecular structures. It supports reading from various coordinate file formats and automatically generates basis set information.

Attributes: coordfile (str): Path to coordinate file atoms (list): List of atoms with coordinates charge (int): Molecular charge basis (Basis): Basis set object for the molecule nelectrons (int): Number of electrons natoms (int): Number of atoms coords (numpy.ndarray): Atomic coordinates in Angstroms coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units atomicSpecies (list): List of atomic symbols Zcharges (list): List of atomic numbers success (bool): Whether molecule was initialized successfully label (str): Molecular label/description

Mol(atoms=None, coordfile=None, charge=0, basis=None)
 44    def __init__(self, atoms=None, coordfile=None, charge=0, basis=None):
 45        """
 46        Initialize a Mol object with atomic coordinates and properties.
 47        
 48        Args:
 49            atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 
 50                                   where symbol can be atomic symbol (str) or atomic number (int),
 51                                   and x, y, z are coordinates in Angstroms.
 52            coordfile (str, optional): Path to coordinate file (.xyz format supported).
 53            charge (int, optional): Molecular charge. Defaults to 0.
 54            basis (dict, optional): Basis set specification. If None, defaults to STO-3G 
 55                                   for all atoms.
 56        
 57        Raises:
 58            ValueError: If neither atoms nor coordfile is provided.
 59            TypeError: If coordfile is not a string.
 60        
 61        Note:
 62            If both atoms and coordfile are provided, atoms takes precedence and 
 63            coordfile is ignored.
 64        """ 
 65        #If no atoms or coordfile is specified.
 66        if atoms is None and coordfile is None:
 67            print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.')
 68            print('See manual for supported file formats.')
 69        #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered.
 70        elif not coordfile is None and not atoms is None:
 71            print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.')
 72        elif coordfile is not None and atoms is None:
 73            if isinstance(coordfile, str):
 74                atoms = self.readCoordsFile(coordfile)
 75            else:
 76                print('Error: The coordfile argument should be a string with the name of the coord file.')
 77
 78        
 79        #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file
 80        self.coordfile = coordfile
 81        """str or None: Path to the coordinate file used to initialize the molecule.
 82        Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 
 83        if the molecule was created from a file, or None if created directly from atoms list."""
 84        #Atoms and their coordinates
 85        self.atoms = atoms
 86        """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where:
 87        - symbol: Atomic symbol (str) or atomic number (int)
 88        - x, y, z: Cartesian coordinates in Angstroms (float)
 89        This is the raw input data used to construct the molecule."""
 90        #Molecular charge
 91        self.charge = charge
 92        """int: Net charge of the molecule in elementary charge units.
 93        Positive values indicate cationic species, negative values indicate anionic species, 
 94        and 0 indicates a neutral molecule. Used to calculate the total number of electrons."""
 95        #The basis object for the Mol object
 96        self.basis = None
 97        """Basis or None: Basis set object containing all basis function information for quantum 
 98        chemical calculations. Initialized as None and populated with a Basis instance that 
 99        handles basis set assignments for each atom in the molecule."""
100
101        self.nelectrons = 0
102        """int: Total number of electrons in the molecule.
103        Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge:
104        nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i."""
105        self.natoms = 0
106        """int: Total number of atoms in the molecule.
107        Incremented during atom validation and used for array dimensioning and iteration 
108        over atomic properties."""
109        self.coords = []
110        """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3).
111        Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 
112        and populated during atom validation to become a numpy array."""
113        self.coordsBohrs = []
114        """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3).
115        Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 
116        Used for quantum chemical calculations requiring atomic units of length."""
117        self.atomicSpecies = []
118        """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']).
119        Ordered the same as atoms appear in the molecule. Length equals natoms. 
120        Used for identifying atom types and output formatting."""
121
122        self.Zcharges = []
123        """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule.
124        Corresponds to the number of protons in each atomic nucleus. Used for calculating 
125        electronic properties and nuclear contributions to various molecular properties."""
126        self.success = False
127        """bool: Flag indicating whether the molecule was successfully initialized and validated.
128        Set to True if all atoms have valid symbols/atomic numbers and properly formatted 
129        coordinates, False otherwise. Used to prevent operations on invalid molecular data."""
130
131        self.label = 'Generic mol'
132        """str: Descriptive label or name for the molecule.
133        Defaults to 'Generic mol' but can be customized. Used in output files and for 
134        identification purposes when exporting molecular data to various file formats."""
135        #Validate if the atoms attribute are provided correctly
136        self.success = self.validateAtoms(atoms)
137        if self.success:
138            self.nelectrons = self.nelectrons+charge
139            self.coordsBohrs = self.coords*Data.Angs2BohrFactor
140            #Basis set names for the atoms.
141            #Either just one basis function can be specified for all atoms
142            #Or one can specify a particular basis set for a particular atom.
143            #'basis' is a dictionary specifying the basis set to be used for a particular atom
144            if basis is None:
145                #Default basis set is sto-3g for all atoms
146                self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'}
147                self.basis = Basis(self, self.basis)
148            else:
149                self.basis = Basis(self, basis)

Initialize a Mol object with atomic coordinates and properties.

Args: atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] where symbol can be atomic symbol (str) or atomic number (int), and x, y, z are coordinates in Angstroms. coordfile (str, optional): Path to coordinate file (.xyz format supported). charge (int, optional): Molecular charge. Defaults to 0. basis (dict, optional): Basis set specification. If None, defaults to STO-3G for all atoms.

Raises: ValueError: If neither atoms nor coordfile is provided. TypeError: If coordfile is not a string.

Note: If both atoms and coordfile are provided, atoms takes precedence and coordfile is ignored.

coordfile

str or None: Path to the coordinate file used to initialize the molecule. Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) if the molecule was created from a file, or None if created directly from atoms list.

atoms

list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where:

  • symbol: Atomic symbol (str) or atomic number (int)
  • x, y, z: Cartesian coordinates in Angstroms (float) This is the raw input data used to construct the molecule.
charge

int: Net charge of the molecule in elementary charge units. Positive values indicate cationic species, negative values indicate anionic species, and 0 indicates a neutral molecule. Used to calculate the total number of electrons.

basis

Basis or None: Basis set object containing all basis function information for quantum chemical calculations. Initialized as None and populated with a Basis instance that handles basis set assignments for each atom in the molecule.

nelectrons

int: Total number of electrons in the molecule. Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge: nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i.

natoms

int: Total number of atoms in the molecule. Incremented during atom validation and used for array dimensioning and iteration over atomic properties.

coords

numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3). Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list and populated during atom validation to become a numpy array.

coordsBohrs

numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3). Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. Used for quantum chemical calculations requiring atomic units of length.

atomicSpecies

list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']). Ordered the same as atoms appear in the molecule. Length equals natoms. Used for identifying atom types and output formatting.

Zcharges

list of int: List of atomic numbers (nuclear charges) for each atom in the molecule. Corresponds to the number of protons in each atomic nucleus. Used for calculating electronic properties and nuclear contributions to various molecular properties.

success

bool: Flag indicating whether the molecule was successfully initialized and validated. Set to True if all atoms have valid symbols/atomic numbers and properly formatted coordinates, False otherwise. Used to prevent operations on invalid molecular data.

label

str: Descriptive label or name for the molecule. Defaults to 'Generic mol' but can be customized. Used in output files and for identification purposes when exporting molecular data to various file formats.

def validateAtoms(self, atoms):
155    def validateAtoms(self, atoms):
156        """
157        Validate atomic symbols and coordinates provided in the atoms list.
158        
159        This method checks that atomic symbols are valid (either as strings matching
160        known element symbols or as integers representing atomic numbers), and that
161        coordinates are properly formatted as numerical values.
162        
163        Args:
164            atoms (list): List of atoms in format [[symbol, x, y, z], ...] where
165                         symbol can be str (element symbol) or int (atomic number),
166                         and x, y, z are numerical coordinates.
167        
168        Returns:
169            bool: True if all atoms are valid, False otherwise.
170        
171        Side Effects:
172            Updates the following instance attributes:
173            - atomicSpecies: List of atomic symbols
174            - Zcharges: List of atomic numbers
175            - nelectrons: Total number of electrons
176            - natoms: Number of atoms
177            - coords: Numpy array of coordinates
178        
179        Raises:
180            Prints error messages for invalid atomic symbols, coordinates, or formatting.
181        """
182        #Validate atomic symbols
183        for i in range(len(atoms)):
184            if isinstance(atoms[i][0], str):
185                elementSymbols = [x.lower() for x in Data.elementSymbols]
186                if atoms[i][0].lower() in elementSymbols:
187                    self.atomicSpecies.append(atoms[i][0])
188                    Z = elementSymbols.index(atoms[i][0].lower())
189                    self.Zcharges.append(Z)
190                    self.nelectrons = self.nelectrons + Z 
191                    self.natoms = self.natoms + 1
192                else:
193                    print('Error: Unknown atomic symbols found')
194                    return False
195            elif isinstance(atoms[i][0], int):
196                if atoms[i][0]<=118 and atoms[i][0]>=0:
197                    self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]])
198                    Z = atoms[i][0]
199                    self.Zcharges.append(Z)
200                    self.nelectrons = self.nelectrons + Z 
201                    self.natoms = self.natoms + 1
202                else:
203                    print('Error: Found atomic number out of the valid range')
204                    return False
205            else:
206                print('Error: Found something other than string or integer for atomic symbols/numbers')
207                return False
208        self.coords = np.zeros([self.natoms,3])
209        #Validate atomic coords
210        for i in range(len(atoms)):
211            if len(atoms[i])==4:
212                #TODO: the following condition doesn't work
213                #FIXME
214                if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]):
215                    self.coords[i] = atoms[i][1:4]
216                else:
217                    print('Error: Coordinates of atoms should be floats/integers.')
218                    return False
219
220            else:
221                print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.')
222                return False
223        
224        if self.natoms==self.coords.shape[0]:
225            return True
226        else:
227            print('Error: Some definitions of atomic symbols/coords were illegal')

Validate atomic symbols and coordinates provided in the atoms list.

This method checks that atomic symbols are valid (either as strings matching known element symbols or as integers representing atomic numbers), and that coordinates are properly formatted as numerical values.

Args: atoms (list): List of atoms in format [[symbol, x, y, z], ...] where symbol can be str (element symbol) or int (atomic number), and x, y, z are numerical coordinates.

Returns: bool: True if all atoms are valid, False otherwise.

Side Effects: Updates the following instance attributes: - atomicSpecies: List of atomic symbols - Zcharges: List of atomic numbers - nelectrons: Total number of electrons - natoms: Number of atoms - coords: Numpy array of coordinates

Raises: Prints error messages for invalid atomic symbols, coordinates, or formatting.

def get_center_of_charge(self, units='angs'):
230    def get_center_of_charge(self, units='angs'):
231        """
232        Calculate the center of charge (weighted by atomic numbers) of the molecule.
233        
234        The center of charge is computed as the weighted average of atomic positions,
235        where the weights are the atomic numbers (nuclear charges) of each atom.
236        
237        Returns:
238            numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms.
239                                  Returns None if the molecule was not successfully initialized.
240        
241        Formula:
242            center_of_charge = Σ(Z_i * r_i) / Σ(Z_i)
243            where Z_i is the atomic number and r_i is the position of atom i.
244        """
245        if not self.success:
246            print("Error: Mol object is not successfully initialized.")
247            return None
248
249        # Use NumPy for efficient calculations
250        total_charge = np.sum(self.Zcharges)
251        if units=='angs':
252            coc = np.dot(self.Zcharges, self.coords) / total_charge
253        else:
254            coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge
255
256        return coc
257        # # Use NumPy for efficient calculations
258        # total_mass = np.sum(self.Zcharges)
259        # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass
260        # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass
261        # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass
262
263        # com = np.array([com_x, com_y, com_z])
264        # return com

Calculate the center of charge (weighted by atomic numbers) of the molecule.

The center of charge is computed as the weighted average of atomic positions, where the weights are the atomic numbers (nuclear charges) of each atom.

Returns: numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms. Returns None if the molecule was not successfully initialized.

Formula: center_of_charge = Σ(Z_i * r_i) / Σ(Z_i) where Z_i is the atomic number and r_i is the position of atom i.

def get_nuc_dip_moment(self):
266    def get_nuc_dip_moment(self):
267        """
268        Calculate the nuclear contribution to the molecular dipole moment.
269        
270        The nuclear dipole moment is computed as the sum of nuclear charges
271        multiplied by their positions in atomic units (Bohr).
272        
273        Returns:
274            numpy.ndarray: 3D vector representing the nuclear dipole moment
275                          in atomic units (e⋅a₀), where each component corresponds
276                          to x, y, z directions.
277        
278        Formula:
279            μ_nuc = Σ(Z_i * r_i)
280            where Z_i is the nuclear charge and r_i is the position in Bohr units.
281        """
282        charges = self.Zcharges
283        coords  = self.coordsBohrs 
284        nuc_dip = np.einsum('i,ix->x', charges, coords)
285        return nuc_dip

Calculate the nuclear contribution to the molecular dipole moment.

The nuclear dipole moment is computed as the sum of nuclear charges multiplied by their positions in atomic units (Bohr).

Returns: numpy.ndarray: 3D vector representing the nuclear dipole moment in atomic units (e⋅a₀), where each component corresponds to x, y, z directions.

Formula: μ_nuc = Σ(Z_i * r_i) where Z_i is the nuclear charge and r_i is the position in Bohr units.

def get_elec_dip_moment(self, dipole_moment_matrix, dmat):
287    def get_elec_dip_moment(self, dipole_moment_matrix, dmat):
288        """
289        Calculate the electronic contribution to the molecular dipole moment.
290        
291        The electronic dipole moment is computed using the dipole moment integrals
292        and the density matrix from quantum chemical calculations.
293        
294        Args:
295            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
296                                                 with shape (3, nbasis, nbasis), where
297                                                 the first dimension corresponds to x, y, z.
298            dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis).
299        
300        Returns:
301            numpy.ndarray: 3D vector representing the electronic dipole moment
302                          in atomic units (e⋅a₀), where each component corresponds
303                          to x, y, z directions.
304        
305        Formula:
306            μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩
307            where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals.
308        """
309        elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real
310        return elec_dip

Calculate the electronic contribution to the molecular dipole moment.

The electronic dipole moment is computed using the dipole moment integrals and the density matrix from quantum chemical calculations.

Args: dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals with shape (3, nbasis, nbasis), where the first dimension corresponds to x, y, z. dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis).

Returns: numpy.ndarray: 3D vector representing the electronic dipole moment in atomic units (e⋅a₀), where each component corresponds to x, y, z directions.

Formula: μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩ where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals.

def get_dipole_moment(self, dipole_moment_matrix, dmat):
312    def get_dipole_moment(self, dipole_moment_matrix, dmat):
313        """
314        Calculate the total molecular dipole moment.
315        
316        The total dipole moment is computed as the difference between nuclear
317        and electronic contributions: μ_total = μ_nuclear - μ_electronic
318        
319        Args:
320            dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals
321                                                 with shape (3, nbasis, nbasis).
322            dmat (numpy.ndarray): Density matrix from quantum chemical calculation
323                                 with shape (nbasis, nbasis).
324        
325        Returns:
326            numpy.ndarray: 3D vector representing the total molecular dipole moment
327                          in atomic units (e⋅a₀). Each component corresponds to
328                          x, y, z directions.
329        
330        Note:
331            The sign convention follows: μ_total = μ_nuclear - μ_electronic
332            This gives the dipole moment vector pointing from negative to positive charge.
333            
334        TODO: Allow user to specify output units (e.g., Debye).
335        """
336        nuc_dip = self.get_nuc_dip_moment()
337        elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat)
338        mol_dip = nuc_dip - elec_dip
339        # TODO: allow the user to specify units (ex: DEBYE)
340        return mol_dip

Calculate the total molecular dipole moment.

The total dipole moment is computed as the difference between nuclear and electronic contributions: μ_total = μ_nuclear - μ_electronic

Args: dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals with shape (3, nbasis, nbasis). dmat (numpy.ndarray): Density matrix from quantum chemical calculation with shape (nbasis, nbasis).

Returns: numpy.ndarray: 3D vector representing the total molecular dipole moment in atomic units (e⋅a₀). Each component corresponds to x, y, z directions.

Note: The sign convention follows: μ_total = μ_nuclear - μ_electronic This gives the dipole moment vector pointing from negative to positive charge.

TODO: Allow user to specify output units (e.g., Debye).

def exportCoords(atomicSpecies, coords, filename='coord'):
343    def exportCoords(atomicSpecies, coords, filename='coord'):
344        """
345        Export molecular coordinates to TURBOMOLE format.
346        
347        Creates a coordinate file in TURBOMOLE format with atomic positions
348        converted from Angstroms to Bohr units.
349        
350        Args:
351            atomicSpecies (list): List of atomic symbols as strings.
352            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
353            filename (str, optional): Output filename. Defaults to 'coord'.
354        
355        File Format:
356            $coord
357            x_bohr y_bohr z_bohr element_symbol
358            ...
359            $end
360        
361        Note:
362            This is a static method that should be called as a class method.
363            Coordinates are automatically converted from Angstroms to Bohr units.
364        
365        Warning:
366            This method contains a bug - it references undefined variable 'coord'
367            instead of 'coords'. Should be fixed in implementation.
368        """
369        file = open(filename,'w')
370        file.write('$coord\n')
371        for i in range(len(atomicSpecies)):
372            file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n')
373        file.write('$end\n')
374        file.close()

Export molecular coordinates to TURBOMOLE format.

Creates a coordinate file in TURBOMOLE format with atomic positions converted from Angstroms to Bohr units.

Args: atomicSpecies (list): List of atomic symbols as strings. coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). filename (str, optional): Output filename. Defaults to 'coord'.

File Format: $coord x_bohr y_bohr z_bohr element_symbol ... $end

Note: This is a static method that should be called as a class method. Coordinates are automatically converted from Angstroms to Bohr units.

Warning: This method contains a bug - it references undefined variable 'coord' instead of 'coords'. Should be fixed in implementation.

def exportXYZ( self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
377    def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
378        """
379        Export molecular geometry to XYZ format file.
380        
381        Creates a standard XYZ format file with atomic coordinates in Angstroms.
382        This method uses the molecule's current atomic species and coordinates.
383        
384        Args:
385            filename (str, optional): Base filename without extension. Defaults to 'coord'.
386                                     The '.xyz' extension will be automatically added.
387            label (str, optional): Comment line for the XYZ file. Defaults to 
388                                  'Generic Mol generated by CrysX (Python Library)'.
389        
390        File Format:
391            number_of_atoms
392            comment_line
393            element_symbol x_coord y_coord z_coord
394            ...
395        
396        Output:
397            Creates a file named '{filename}.xyz' in the current directory.
398        
399        Example:
400            mol.exportXYZ('water', 'H2O molecule')
401            # Creates 'water.xyz' with H2O coordinates
402        """
403        file = open(filename+'.xyz','w')
404        file.write(str(self.natoms)+'\n')
405        file.write(label+'\n')
406        for i in range(self.natoms):
407            file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n')
408        file.close()   

Export molecular geometry to XYZ format file.

Creates a standard XYZ format file with atomic coordinates in Angstroms. This method uses the molecule's current atomic species and coordinates.

Args: filename (str, optional): Base filename without extension. Defaults to 'coord'. The '.xyz' extension will be automatically added. label (str, optional): Comment line for the XYZ file. Defaults to 'Generic Mol generated by CrysX (Python Library)'.

File Format: number_of_atoms comment_line element_symbol x_coord y_coord z_coord ...

Output: Creates a file named '{filename}.xyz' in the current directory.

Example: mol.exportXYZ('water', 'H2O molecule') # Creates 'water.xyz' with H2O coordinates

def exportXYZs( atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
410    def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'):
411        """
412        Export molecular geometry to XYZ format (static method version).
413        
414        Static method to create XYZ files from provided atomic species and coordinates,
415        without requiring a Mol object instance.
416        
417        Args:
418            atomicSpecies (list): List of atomic symbols as strings.
419            coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3).
420            filename (str, optional): Base filename without extension. Defaults to 'coord'.
421                                     The '.xyz' extension will be automatically added.
422            label (str, optional): Comment line for the XYZ file. Defaults to 
423                                  'Generic Mol generated by CrysX (Python Library)'.
424        
425        File Format:
426            number_of_atoms
427            comment_line
428            element_symbol x_coord y_coord z_coord
429            ...
430        
431        Warning:
432            This method contains a bug - it references undefined variable 'coord'
433            instead of 'coords'. Should be fixed in implementation.
434        
435        Note:
436            This is a static method and should be called as a class method.
437        """
438        file = open(filename+'.xyz','w')
439        file.write(str(len(atomicSpecies))+'\n')
440        file.write(label+'\n')
441        for i in range(len(atomicSpecies)):
442            file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n')
443        file.close()

Export molecular geometry to XYZ format (static method version).

Static method to create XYZ files from provided atomic species and coordinates, without requiring a Mol object instance.

Args: atomicSpecies (list): List of atomic symbols as strings. coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). filename (str, optional): Base filename without extension. Defaults to 'coord'. The '.xyz' extension will be automatically added. label (str, optional): Comment line for the XYZ file. Defaults to 'Generic Mol generated by CrysX (Python Library)'.

File Format: number_of_atoms comment_line element_symbol x_coord y_coord z_coord ...

Warning: This method contains a bug - it references undefined variable 'coord' instead of 'coords'. Should be fixed in implementation.

Note: This is a static method and should be called as a class method.

def readCoordsFile(self, filename):
446    def readCoordsFile(self, filename):
447        """
448        Read molecular coordinates from various file formats.
449        
450        Currently supports XYZ format files. This method serves as a dispatcher
451        to format-specific reading methods based on file extension.
452        
453        Args:
454            filename (str): Path to the coordinate file. File format is determined
455                           by the file extension.
456        
457        Returns:
458            list: List of atoms in the format [[symbol, x, y, z], ...] where
459                  symbol is the atomic symbol and x, y, z are coordinates in Angstroms.
460        
461        Supported Formats:
462            - .xyz: Standard XYZ coordinate format
463        
464        Future Extensions:
465            - .mol: MOL file format
466            - coord: TURBOMOLE coordinate format
467            - Other quantum chemistry input formats
468        
469        Raises:
470            Prints error message if file format is not supported.
471        """
472        if filename.endswith('.xyz'):
473            atoms = self.readXYZ(filename)
474        else:
475            print('Only xyz files for now') 
476        return atoms

Read molecular coordinates from various file formats.

Currently supports XYZ format files. This method serves as a dispatcher to format-specific reading methods based on file extension.

Args: filename (str): Path to the coordinate file. File format is determined by the file extension.

Returns: list: List of atoms in the format [[symbol, x, y, z], ...] where symbol is the atomic symbol and x, y, z are coordinates in Angstroms.

Supported Formats: - .xyz: Standard XYZ coordinate format

Future Extensions: - .mol: MOL file format - coord: TURBOMOLE coordinate format - Other quantum chemistry input formats

Raises: Prints error message if file format is not supported.

def readXYZ(self, filename):
478    def readXYZ(self, filename):
479        """
480        Read molecular geometry from an XYZ format file.
481        
482        Parses a standard XYZ file and extracts atomic symbols and coordinates.
483        The XYZ format consists of:
484        - Line 1: Number of atoms
485        - Line 2: Comment line (ignored)
486        - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate
487        
488        Args:
489            filename (str): Path to the XYZ file to read.
490        
491        Returns:
492            list: List of atoms in format [[symbol, x, y, z], ...] where:
493                  - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O')
494                  - x, y, z (float): Coordinates in Angstroms
495        
496        File Format Expected:
497            number_of_atoms
498            comment_line
499            element_symbol x_coord y_coord z_coord
500            element_symbol x_coord y_coord z_coord
501            ...
502        
503        Error Handling:
504            - Prints error if the number of atoms read doesn't match the declared count
505            - Assumes coordinates are separated by whitespace
506            - Strips whitespace from all input
507        
508        Example:
509            For a file 'water.xyz':
510            3
511            Water molecule
512            O    0.000000    0.000000    0.000000
513            H    0.000000    0.000000    0.970000
514            H    0.944863    0.000000   -0.242498
515            
516            Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]]
517        """
518        with open(filename, "r") as f:
519            lines = f.readlines()
520
521        if len(lines) < 2:
522            raise ValueError(f"File {filename} does not contain enough lines for XYZ format")
523
524        # Read number of atoms from first line
525        try:
526            natoms = int(lines[0].strip())
527        except ValueError:
528            raise ValueError(f"First line of {filename} must contain an integer atom count")
529
530        # The second line is the comment (can be blank)
531        comment_line = lines[1].rstrip("\n")
532
533        atoms = []
534        for i, line in enumerate(lines[2:2 + natoms], start=3):
535            parts = line.split()
536            if len(parts) < 4:
537                raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}")
538            symbol = parts[0]
539            try:
540                x, y, z = map(float, parts[1:4])
541            except ValueError:
542                raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}")
543            atoms.append([symbol, x, y, z])
544
545        if len(atoms) != natoms:
546            raise ValueError(
547                f"Expected {natoms} atoms, but found {len(atoms)} in {filename}"
548            )
549
550        return atoms

Read molecular geometry from an XYZ format file.

Parses a standard XYZ file and extracts atomic symbols and coordinates. The XYZ format consists of:

  • Line 1: Number of atoms
  • Line 2: Comment line (ignored)
  • Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate

Args: filename (str): Path to the XYZ file to read.

Returns: list: List of atoms in format [[symbol, x, y, z], ...] where: - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O') - x, y, z (float): Coordinates in Angstroms

File Format Expected: number_of_atoms comment_line element_symbol x_coord y_coord z_coord element_symbol x_coord y_coord z_coord ...

Error Handling: - Prints error if the number of atoms read doesn't match the declared count - Assumes coordinates are separated by whitespace - Strips whitespace from all input

Example: For a file 'water.xyz': 3 Water molecule O 0.000000 0.000000 0.000000 H 0.000000 0.000000 0.970000 H 0.944863 0.000000 -0.242498

Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]]