pyfock.Mol
1# Mol.py 2# Author: Manas Sharma ([email protected]) 3# This is a part of CrysX (https://bragitoff.com/crysx) 4# 5# 6# .d8888b. Y88b d88P 8888888b. 8888888888 888 7# d88P Y88b Y88b d88P 888 Y88b 888 888 8# 888 888 Y88o88P 888 888 888 888 9# 888 888d888 888 888 .d8888b Y888P 888 d88P 888 888 8888888 .d88b. .d8888b 888 888 10# 888 888P" 888 888 88K d888b 8888888P" 888 888 888 d88""88b d88P" 888 .88P 11# 888 888 888 888 888 "Y8888b. d88888b 888888 888 888 888 888 888 888 888 888888K 12# Y88b d88P 888 Y88b 888 X88 d88P Y88b 888 Y88b 888 888 Y88..88P Y88b. 888 "88b 13# "Y8888P" 888 "Y88888 88888P' d88P Y88b 888 "Y88888 888 "Y88P" "Y8888P 888 888 14# 888 888 15# Y8b d88P Y8b d88P 16# "Y88P" "Y88P" 17from . import Data 18from . import Basis 19import numpy as np 20#Class to store molecular properties 21class Mol: 22 """ 23 Class to store molecular properties and handle molecular structure operations. 24 25 This class provides functionality for creating, manipulating, and exporting molecular 26 structures. It supports reading from various coordinate file formats and automatically 27 generates basis set information. 28 29 Attributes: 30 coordfile (str): Path to coordinate file 31 atoms (list): List of atoms with coordinates 32 charge (int): Molecular charge 33 basis (Basis): Basis set object for the molecule 34 nelectrons (int): Number of electrons 35 natoms (int): Number of atoms 36 coords (numpy.ndarray): Atomic coordinates in Angstroms 37 coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units 38 atomicSpecies (list): List of atomic symbols 39 Zcharges (list): List of atomic numbers 40 success (bool): Whether molecule was initialized successfully 41 label (str): Molecular label/description 42 """ 43 def __init__(self, atoms=None, coordfile=None, charge=0, basis=None): 44 """ 45 Initialize a Mol object with atomic coordinates and properties. 46 47 Args: 48 atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 49 where symbol can be atomic symbol (str) or atomic number (int), 50 and x, y, z are coordinates in Angstroms. 51 coordfile (str, optional): Path to coordinate file (.xyz format supported). 52 charge (int, optional): Molecular charge. Defaults to 0. 53 basis (dict, optional): Basis set specification. If None, defaults to STO-3G 54 for all atoms. 55 56 Raises: 57 ValueError: If neither atoms nor coordfile is provided. 58 TypeError: If coordfile is not a string. 59 60 Note: 61 If both atoms and coordfile are provided, atoms takes precedence and 62 coordfile is ignored. 63 """ 64 #If no atoms or coordfile is specified. 65 if atoms is None and coordfile is None: 66 print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.') 67 print('See manual for supported file formats.') 68 #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered. 69 elif not coordfile is None and not atoms is None: 70 print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.') 71 elif coordfile is not None and atoms is None: 72 if isinstance(coordfile, str): 73 atoms = self.readCoordsFile(coordfile) 74 else: 75 print('Error: The coordfile argument should be a string with the name of the coord file.') 76 77 78 #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file 79 self.coordfile = coordfile 80 """str or None: Path to the coordinate file used to initialize the molecule. 81 Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 82 if the molecule was created from a file, or None if created directly from atoms list.""" 83 #Atoms and their coordinates 84 self.atoms = atoms 85 """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where: 86 - symbol: Atomic symbol (str) or atomic number (int) 87 - x, y, z: Cartesian coordinates in Angstroms (float) 88 This is the raw input data used to construct the molecule.""" 89 #Molecular charge 90 self.charge = charge 91 """int: Net charge of the molecule in elementary charge units. 92 Positive values indicate cationic species, negative values indicate anionic species, 93 and 0 indicates a neutral molecule. Used to calculate the total number of electrons.""" 94 #The basis object for the Mol object 95 self.basis = None 96 """Basis or None: Basis set object containing all basis function information for quantum 97 chemical calculations. Initialized as None and populated with a Basis instance that 98 handles basis set assignments for each atom in the molecule.""" 99 100 self.nelectrons = 0 101 """int: Total number of electrons in the molecule. 102 Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge: 103 nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i.""" 104 self.natoms = 0 105 """int: Total number of atoms in the molecule. 106 Incremented during atom validation and used for array dimensioning and iteration 107 over atomic properties.""" 108 self.coords = [] 109 """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3). 110 Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 111 and populated during atom validation to become a numpy array.""" 112 self.coordsBohrs = [] 113 """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3). 114 Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 115 Used for quantum chemical calculations requiring atomic units of length.""" 116 self.atomicSpecies = [] 117 """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']). 118 Ordered the same as atoms appear in the molecule. Length equals natoms. 119 Used for identifying atom types and output formatting.""" 120 121 self.Zcharges = [] 122 """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule. 123 Corresponds to the number of protons in each atomic nucleus. Used for calculating 124 electronic properties and nuclear contributions to various molecular properties.""" 125 self.success = False 126 """bool: Flag indicating whether the molecule was successfully initialized and validated. 127 Set to True if all atoms have valid symbols/atomic numbers and properly formatted 128 coordinates, False otherwise. Used to prevent operations on invalid molecular data.""" 129 130 self.label = 'Generic mol' 131 """str: Descriptive label or name for the molecule. 132 Defaults to 'Generic mol' but can be customized. Used in output files and for 133 identification purposes when exporting molecular data to various file formats.""" 134 #Validate if the atoms attribute are provided correctly 135 self.success = self.validateAtoms(atoms) 136 if self.success: 137 self.nelectrons = self.nelectrons+charge 138 self.coordsBohrs = self.coords*Data.Angs2BohrFactor 139 #Basis set names for the atoms. 140 #Either just one basis function can be specified for all atoms 141 #Or one can specify a particular basis set for a particular atom. 142 #'basis' is a dictionary specifying the basis set to be used for a particular atom 143 if basis is None: 144 #Default basis set is sto-3g for all atoms 145 self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'} 146 self.basis = Basis(self, self.basis) 147 else: 148 self.basis = Basis(self, basis) 149 150 151 152 153 154 def validateAtoms(self, atoms): 155 """ 156 Validate atomic symbols and coordinates provided in the atoms list. 157 158 This method checks that atomic symbols are valid (either as strings matching 159 known element symbols or as integers representing atomic numbers), and that 160 coordinates are properly formatted as numerical values. 161 162 Args: 163 atoms (list): List of atoms in format [[symbol, x, y, z], ...] where 164 symbol can be str (element symbol) or int (atomic number), 165 and x, y, z are numerical coordinates. 166 167 Returns: 168 bool: True if all atoms are valid, False otherwise. 169 170 Side Effects: 171 Updates the following instance attributes: 172 - atomicSpecies: List of atomic symbols 173 - Zcharges: List of atomic numbers 174 - nelectrons: Total number of electrons 175 - natoms: Number of atoms 176 - coords: Numpy array of coordinates 177 178 Raises: 179 Prints error messages for invalid atomic symbols, coordinates, or formatting. 180 """ 181 #Validate atomic symbols 182 for i in range(len(atoms)): 183 if isinstance(atoms[i][0], str): 184 elementSymbols = [x.lower() for x in Data.elementSymbols] 185 if atoms[i][0].lower() in elementSymbols: 186 self.atomicSpecies.append(atoms[i][0]) 187 Z = elementSymbols.index(atoms[i][0].lower()) 188 self.Zcharges.append(Z) 189 self.nelectrons = self.nelectrons + Z 190 self.natoms = self.natoms + 1 191 else: 192 print('Error: Unknown atomic symbols found') 193 return False 194 elif isinstance(atoms[i][0], int): 195 if atoms[i][0]<=118 and atoms[i][0]>=0: 196 self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]]) 197 Z = atoms[i][0] 198 self.Zcharges.append(Z) 199 self.nelectrons = self.nelectrons + Z 200 self.natoms = self.natoms + 1 201 else: 202 print('Error: Found atomic number out of the valid range') 203 return False 204 else: 205 print('Error: Found something other than string or integer for atomic symbols/numbers') 206 return False 207 self.coords = np.zeros([self.natoms,3]) 208 #Validate atomic coords 209 for i in range(len(atoms)): 210 if len(atoms[i])==4: 211 #TODO: the following condition doesn't work 212 #FIXME 213 if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]): 214 self.coords[i] = atoms[i][1:4] 215 else: 216 print('Error: Coordinates of atoms should be floats/integers.') 217 return False 218 219 else: 220 print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.') 221 return False 222 223 if self.natoms==self.coords.shape[0]: 224 return True 225 else: 226 print('Error: Some definitions of atomic symbols/coords were illegal') 227 228 229 def get_center_of_charge(self, units='angs'): 230 """ 231 Calculate the center of charge (weighted by atomic numbers) of the molecule. 232 233 The center of charge is computed as the weighted average of atomic positions, 234 where the weights are the atomic numbers (nuclear charges) of each atom. 235 236 Returns: 237 numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms. 238 Returns None if the molecule was not successfully initialized. 239 240 Formula: 241 center_of_charge = Σ(Z_i * r_i) / Σ(Z_i) 242 where Z_i is the atomic number and r_i is the position of atom i. 243 """ 244 if not self.success: 245 print("Error: Mol object is not successfully initialized.") 246 return None 247 248 # Use NumPy for efficient calculations 249 total_charge = np.sum(self.Zcharges) 250 if units=='angs': 251 coc = np.dot(self.Zcharges, self.coords) / total_charge 252 else: 253 coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge 254 255 return coc 256 # # Use NumPy for efficient calculations 257 # total_mass = np.sum(self.Zcharges) 258 # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass 259 # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass 260 # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass 261 262 # com = np.array([com_x, com_y, com_z]) 263 # return com 264 265 def get_nuc_dip_moment(self): 266 """ 267 Calculate the nuclear contribution to the molecular dipole moment. 268 269 The nuclear dipole moment is computed as the sum of nuclear charges 270 multiplied by their positions in atomic units (Bohr). 271 272 Returns: 273 numpy.ndarray: 3D vector representing the nuclear dipole moment 274 in atomic units (e⋅a₀), where each component corresponds 275 to x, y, z directions. 276 277 Formula: 278 μ_nuc = Σ(Z_i * r_i) 279 where Z_i is the nuclear charge and r_i is the position in Bohr units. 280 """ 281 charges = self.Zcharges 282 coords = self.coordsBohrs 283 nuc_dip = np.einsum('i,ix->x', charges, coords) 284 return nuc_dip 285 286 def get_elec_dip_moment(self, dipole_moment_matrix, dmat): 287 """ 288 Calculate the electronic contribution to the molecular dipole moment. 289 290 The electronic dipole moment is computed using the dipole moment integrals 291 and the density matrix from quantum chemical calculations. 292 293 Args: 294 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 295 with shape (3, nbasis, nbasis), where 296 the first dimension corresponds to x, y, z. 297 dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis). 298 299 Returns: 300 numpy.ndarray: 3D vector representing the electronic dipole moment 301 in atomic units (e⋅a₀), where each component corresponds 302 to x, y, z directions. 303 304 Formula: 305 μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩ 306 where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals. 307 """ 308 elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real 309 return elec_dip 310 311 def get_dipole_moment(self, dipole_moment_matrix, dmat): 312 """ 313 Calculate the total molecular dipole moment. 314 315 The total dipole moment is computed as the difference between nuclear 316 and electronic contributions: μ_total = μ_nuclear - μ_electronic 317 318 Args: 319 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 320 with shape (3, nbasis, nbasis). 321 dmat (numpy.ndarray): Density matrix from quantum chemical calculation 322 with shape (nbasis, nbasis). 323 324 Returns: 325 numpy.ndarray: 3D vector representing the total molecular dipole moment 326 in atomic units (e⋅a₀). Each component corresponds to 327 x, y, z directions. 328 329 Note: 330 The sign convention follows: μ_total = μ_nuclear - μ_electronic 331 This gives the dipole moment vector pointing from negative to positive charge. 332 333 TODO: Allow user to specify output units (e.g., Debye). 334 """ 335 nuc_dip = self.get_nuc_dip_moment() 336 elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat) 337 mol_dip = nuc_dip - elec_dip 338 # TODO: allow the user to specify units (ex: DEBYE) 339 return mol_dip 340 341 #Export coord file in TURBOMOLE format 342 def exportCoords(atomicSpecies, coords, filename='coord'): 343 """ 344 Export molecular coordinates to TURBOMOLE format. 345 346 Creates a coordinate file in TURBOMOLE format with atomic positions 347 converted from Angstroms to Bohr units. 348 349 Args: 350 atomicSpecies (list): List of atomic symbols as strings. 351 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 352 filename (str, optional): Output filename. Defaults to 'coord'. 353 354 File Format: 355 $coord 356 x_bohr y_bohr z_bohr element_symbol 357 ... 358 $end 359 360 Note: 361 This is a static method that should be called as a class method. 362 Coordinates are automatically converted from Angstroms to Bohr units. 363 364 Warning: 365 This method contains a bug - it references undefined variable 'coord' 366 instead of 'coords'. Should be fixed in implementation. 367 """ 368 file = open(filename,'w') 369 file.write('$coord\n') 370 for i in range(len(atomicSpecies)): 371 file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n') 372 file.write('$end\n') 373 file.close() 374 375 #Export the geometry in XYZ format 376 def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 377 """ 378 Export molecular geometry to XYZ format file. 379 380 Creates a standard XYZ format file with atomic coordinates in Angstroms. 381 This method uses the molecule's current atomic species and coordinates. 382 383 Args: 384 filename (str, optional): Base filename without extension. Defaults to 'coord'. 385 The '.xyz' extension will be automatically added. 386 label (str, optional): Comment line for the XYZ file. Defaults to 387 'Generic Mol generated by CrysX (Python Library)'. 388 389 File Format: 390 number_of_atoms 391 comment_line 392 element_symbol x_coord y_coord z_coord 393 ... 394 395 Output: 396 Creates a file named '{filename}.xyz' in the current directory. 397 398 Example: 399 mol.exportXYZ('water', 'H2O molecule') 400 # Creates 'water.xyz' with H2O coordinates 401 """ 402 file = open(filename+'.xyz','w') 403 file.write(str(self.natoms)+'\n') 404 file.write(label+'\n') 405 for i in range(self.natoms): 406 file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n') 407 file.close() 408 #Export the geometry in XYZ format 409 def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 410 """ 411 Export molecular geometry to XYZ format (static method version). 412 413 Static method to create XYZ files from provided atomic species and coordinates, 414 without requiring a Mol object instance. 415 416 Args: 417 atomicSpecies (list): List of atomic symbols as strings. 418 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 419 filename (str, optional): Base filename without extension. Defaults to 'coord'. 420 The '.xyz' extension will be automatically added. 421 label (str, optional): Comment line for the XYZ file. Defaults to 422 'Generic Mol generated by CrysX (Python Library)'. 423 424 File Format: 425 number_of_atoms 426 comment_line 427 element_symbol x_coord y_coord z_coord 428 ... 429 430 Warning: 431 This method contains a bug - it references undefined variable 'coord' 432 instead of 'coords'. Should be fixed in implementation. 433 434 Note: 435 This is a static method and should be called as a class method. 436 """ 437 file = open(filename+'.xyz','w') 438 file.write(str(len(atomicSpecies))+'\n') 439 file.write(label+'\n') 440 for i in range(len(atomicSpecies)): 441 file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n') 442 file.close() 443 444 # Read geometry from TURBOOLE coords file 445 def readCoordsFile(self, filename): 446 """ 447 Read molecular coordinates from various file formats. 448 449 Currently supports XYZ format files. This method serves as a dispatcher 450 to format-specific reading methods based on file extension. 451 452 Args: 453 filename (str): Path to the coordinate file. File format is determined 454 by the file extension. 455 456 Returns: 457 list: List of atoms in the format [[symbol, x, y, z], ...] where 458 symbol is the atomic symbol and x, y, z are coordinates in Angstroms. 459 460 Supported Formats: 461 - .xyz: Standard XYZ coordinate format 462 463 Future Extensions: 464 - .mol: MOL file format 465 - coord: TURBOMOLE coordinate format 466 - Other quantum chemistry input formats 467 468 Raises: 469 Prints error message if file format is not supported. 470 """ 471 if filename.endswith('.xyz'): 472 atoms = self.readXYZ(filename) 473 else: 474 print('Only xyz files for now') 475 return atoms 476 #Read geometry information from a .xyz file 477 def readXYZ(self, filename): 478 """ 479 Read molecular geometry from an XYZ format file. 480 481 Parses a standard XYZ file and extracts atomic symbols and coordinates. 482 The XYZ format consists of: 483 - Line 1: Number of atoms 484 - Line 2: Comment line (ignored) 485 - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate 486 487 Args: 488 filename (str): Path to the XYZ file to read. 489 490 Returns: 491 list: List of atoms in format [[symbol, x, y, z], ...] where: 492 - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O') 493 - x, y, z (float): Coordinates in Angstroms 494 495 File Format Expected: 496 number_of_atoms 497 comment_line 498 element_symbol x_coord y_coord z_coord 499 element_symbol x_coord y_coord z_coord 500 ... 501 502 Error Handling: 503 - Prints error if the number of atoms read doesn't match the declared count 504 - Assumes coordinates are separated by whitespace 505 - Strips whitespace from all input 506 507 Example: 508 For a file 'water.xyz': 509 3 510 Water molecule 511 O 0.000000 0.000000 0.000000 512 H 0.000000 0.000000 0.970000 513 H 0.944863 0.000000 -0.242498 514 515 Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]] 516 """ 517 with open(filename, "r") as f: 518 lines = f.readlines() 519 520 if len(lines) < 2: 521 raise ValueError(f"File {filename} does not contain enough lines for XYZ format") 522 523 # Read number of atoms from first line 524 try: 525 natoms = int(lines[0].strip()) 526 except ValueError: 527 raise ValueError(f"First line of {filename} must contain an integer atom count") 528 529 # The second line is the comment (can be blank) 530 comment_line = lines[1].rstrip("\n") 531 532 atoms = [] 533 for i, line in enumerate(lines[2:2 + natoms], start=3): 534 parts = line.split() 535 if len(parts) < 4: 536 raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}") 537 symbol = parts[0] 538 try: 539 x, y, z = map(float, parts[1:4]) 540 except ValueError: 541 raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}") 542 atoms.append([symbol, x, y, z]) 543 544 if len(atoms) != natoms: 545 raise ValueError( 546 f"Expected {natoms} atoms, but found {len(atoms)} in {filename}" 547 ) 548 549 return atoms
22class Mol: 23 """ 24 Class to store molecular properties and handle molecular structure operations. 25 26 This class provides functionality for creating, manipulating, and exporting molecular 27 structures. It supports reading from various coordinate file formats and automatically 28 generates basis set information. 29 30 Attributes: 31 coordfile (str): Path to coordinate file 32 atoms (list): List of atoms with coordinates 33 charge (int): Molecular charge 34 basis (Basis): Basis set object for the molecule 35 nelectrons (int): Number of electrons 36 natoms (int): Number of atoms 37 coords (numpy.ndarray): Atomic coordinates in Angstroms 38 coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units 39 atomicSpecies (list): List of atomic symbols 40 Zcharges (list): List of atomic numbers 41 success (bool): Whether molecule was initialized successfully 42 label (str): Molecular label/description 43 """ 44 def __init__(self, atoms=None, coordfile=None, charge=0, basis=None): 45 """ 46 Initialize a Mol object with atomic coordinates and properties. 47 48 Args: 49 atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 50 where symbol can be atomic symbol (str) or atomic number (int), 51 and x, y, z are coordinates in Angstroms. 52 coordfile (str, optional): Path to coordinate file (.xyz format supported). 53 charge (int, optional): Molecular charge. Defaults to 0. 54 basis (dict, optional): Basis set specification. If None, defaults to STO-3G 55 for all atoms. 56 57 Raises: 58 ValueError: If neither atoms nor coordfile is provided. 59 TypeError: If coordfile is not a string. 60 61 Note: 62 If both atoms and coordfile are provided, atoms takes precedence and 63 coordfile is ignored. 64 """ 65 #If no atoms or coordfile is specified. 66 if atoms is None and coordfile is None: 67 print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.') 68 print('See manual for supported file formats.') 69 #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered. 70 elif not coordfile is None and not atoms is None: 71 print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.') 72 elif coordfile is not None and atoms is None: 73 if isinstance(coordfile, str): 74 atoms = self.readCoordsFile(coordfile) 75 else: 76 print('Error: The coordfile argument should be a string with the name of the coord file.') 77 78 79 #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file 80 self.coordfile = coordfile 81 """str or None: Path to the coordinate file used to initialize the molecule. 82 Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 83 if the molecule was created from a file, or None if created directly from atoms list.""" 84 #Atoms and their coordinates 85 self.atoms = atoms 86 """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where: 87 - symbol: Atomic symbol (str) or atomic number (int) 88 - x, y, z: Cartesian coordinates in Angstroms (float) 89 This is the raw input data used to construct the molecule.""" 90 #Molecular charge 91 self.charge = charge 92 """int: Net charge of the molecule in elementary charge units. 93 Positive values indicate cationic species, negative values indicate anionic species, 94 and 0 indicates a neutral molecule. Used to calculate the total number of electrons.""" 95 #The basis object for the Mol object 96 self.basis = None 97 """Basis or None: Basis set object containing all basis function information for quantum 98 chemical calculations. Initialized as None and populated with a Basis instance that 99 handles basis set assignments for each atom in the molecule.""" 100 101 self.nelectrons = 0 102 """int: Total number of electrons in the molecule. 103 Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge: 104 nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i.""" 105 self.natoms = 0 106 """int: Total number of atoms in the molecule. 107 Incremented during atom validation and used for array dimensioning and iteration 108 over atomic properties.""" 109 self.coords = [] 110 """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3). 111 Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 112 and populated during atom validation to become a numpy array.""" 113 self.coordsBohrs = [] 114 """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3). 115 Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 116 Used for quantum chemical calculations requiring atomic units of length.""" 117 self.atomicSpecies = [] 118 """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']). 119 Ordered the same as atoms appear in the molecule. Length equals natoms. 120 Used for identifying atom types and output formatting.""" 121 122 self.Zcharges = [] 123 """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule. 124 Corresponds to the number of protons in each atomic nucleus. Used for calculating 125 electronic properties and nuclear contributions to various molecular properties.""" 126 self.success = False 127 """bool: Flag indicating whether the molecule was successfully initialized and validated. 128 Set to True if all atoms have valid symbols/atomic numbers and properly formatted 129 coordinates, False otherwise. Used to prevent operations on invalid molecular data.""" 130 131 self.label = 'Generic mol' 132 """str: Descriptive label or name for the molecule. 133 Defaults to 'Generic mol' but can be customized. Used in output files and for 134 identification purposes when exporting molecular data to various file formats.""" 135 #Validate if the atoms attribute are provided correctly 136 self.success = self.validateAtoms(atoms) 137 if self.success: 138 self.nelectrons = self.nelectrons+charge 139 self.coordsBohrs = self.coords*Data.Angs2BohrFactor 140 #Basis set names for the atoms. 141 #Either just one basis function can be specified for all atoms 142 #Or one can specify a particular basis set for a particular atom. 143 #'basis' is a dictionary specifying the basis set to be used for a particular atom 144 if basis is None: 145 #Default basis set is sto-3g for all atoms 146 self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'} 147 self.basis = Basis(self, self.basis) 148 else: 149 self.basis = Basis(self, basis) 150 151 152 153 154 155 def validateAtoms(self, atoms): 156 """ 157 Validate atomic symbols and coordinates provided in the atoms list. 158 159 This method checks that atomic symbols are valid (either as strings matching 160 known element symbols or as integers representing atomic numbers), and that 161 coordinates are properly formatted as numerical values. 162 163 Args: 164 atoms (list): List of atoms in format [[symbol, x, y, z], ...] where 165 symbol can be str (element symbol) or int (atomic number), 166 and x, y, z are numerical coordinates. 167 168 Returns: 169 bool: True if all atoms are valid, False otherwise. 170 171 Side Effects: 172 Updates the following instance attributes: 173 - atomicSpecies: List of atomic symbols 174 - Zcharges: List of atomic numbers 175 - nelectrons: Total number of electrons 176 - natoms: Number of atoms 177 - coords: Numpy array of coordinates 178 179 Raises: 180 Prints error messages for invalid atomic symbols, coordinates, or formatting. 181 """ 182 #Validate atomic symbols 183 for i in range(len(atoms)): 184 if isinstance(atoms[i][0], str): 185 elementSymbols = [x.lower() for x in Data.elementSymbols] 186 if atoms[i][0].lower() in elementSymbols: 187 self.atomicSpecies.append(atoms[i][0]) 188 Z = elementSymbols.index(atoms[i][0].lower()) 189 self.Zcharges.append(Z) 190 self.nelectrons = self.nelectrons + Z 191 self.natoms = self.natoms + 1 192 else: 193 print('Error: Unknown atomic symbols found') 194 return False 195 elif isinstance(atoms[i][0], int): 196 if atoms[i][0]<=118 and atoms[i][0]>=0: 197 self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]]) 198 Z = atoms[i][0] 199 self.Zcharges.append(Z) 200 self.nelectrons = self.nelectrons + Z 201 self.natoms = self.natoms + 1 202 else: 203 print('Error: Found atomic number out of the valid range') 204 return False 205 else: 206 print('Error: Found something other than string or integer for atomic symbols/numbers') 207 return False 208 self.coords = np.zeros([self.natoms,3]) 209 #Validate atomic coords 210 for i in range(len(atoms)): 211 if len(atoms[i])==4: 212 #TODO: the following condition doesn't work 213 #FIXME 214 if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]): 215 self.coords[i] = atoms[i][1:4] 216 else: 217 print('Error: Coordinates of atoms should be floats/integers.') 218 return False 219 220 else: 221 print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.') 222 return False 223 224 if self.natoms==self.coords.shape[0]: 225 return True 226 else: 227 print('Error: Some definitions of atomic symbols/coords were illegal') 228 229 230 def get_center_of_charge(self, units='angs'): 231 """ 232 Calculate the center of charge (weighted by atomic numbers) of the molecule. 233 234 The center of charge is computed as the weighted average of atomic positions, 235 where the weights are the atomic numbers (nuclear charges) of each atom. 236 237 Returns: 238 numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms. 239 Returns None if the molecule was not successfully initialized. 240 241 Formula: 242 center_of_charge = Σ(Z_i * r_i) / Σ(Z_i) 243 where Z_i is the atomic number and r_i is the position of atom i. 244 """ 245 if not self.success: 246 print("Error: Mol object is not successfully initialized.") 247 return None 248 249 # Use NumPy for efficient calculations 250 total_charge = np.sum(self.Zcharges) 251 if units=='angs': 252 coc = np.dot(self.Zcharges, self.coords) / total_charge 253 else: 254 coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge 255 256 return coc 257 # # Use NumPy for efficient calculations 258 # total_mass = np.sum(self.Zcharges) 259 # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass 260 # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass 261 # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass 262 263 # com = np.array([com_x, com_y, com_z]) 264 # return com 265 266 def get_nuc_dip_moment(self): 267 """ 268 Calculate the nuclear contribution to the molecular dipole moment. 269 270 The nuclear dipole moment is computed as the sum of nuclear charges 271 multiplied by their positions in atomic units (Bohr). 272 273 Returns: 274 numpy.ndarray: 3D vector representing the nuclear dipole moment 275 in atomic units (e⋅a₀), where each component corresponds 276 to x, y, z directions. 277 278 Formula: 279 μ_nuc = Σ(Z_i * r_i) 280 where Z_i is the nuclear charge and r_i is the position in Bohr units. 281 """ 282 charges = self.Zcharges 283 coords = self.coordsBohrs 284 nuc_dip = np.einsum('i,ix->x', charges, coords) 285 return nuc_dip 286 287 def get_elec_dip_moment(self, dipole_moment_matrix, dmat): 288 """ 289 Calculate the electronic contribution to the molecular dipole moment. 290 291 The electronic dipole moment is computed using the dipole moment integrals 292 and the density matrix from quantum chemical calculations. 293 294 Args: 295 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 296 with shape (3, nbasis, nbasis), where 297 the first dimension corresponds to x, y, z. 298 dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis). 299 300 Returns: 301 numpy.ndarray: 3D vector representing the electronic dipole moment 302 in atomic units (e⋅a₀), where each component corresponds 303 to x, y, z directions. 304 305 Formula: 306 μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩ 307 where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals. 308 """ 309 elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real 310 return elec_dip 311 312 def get_dipole_moment(self, dipole_moment_matrix, dmat): 313 """ 314 Calculate the total molecular dipole moment. 315 316 The total dipole moment is computed as the difference between nuclear 317 and electronic contributions: μ_total = μ_nuclear - μ_electronic 318 319 Args: 320 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 321 with shape (3, nbasis, nbasis). 322 dmat (numpy.ndarray): Density matrix from quantum chemical calculation 323 with shape (nbasis, nbasis). 324 325 Returns: 326 numpy.ndarray: 3D vector representing the total molecular dipole moment 327 in atomic units (e⋅a₀). Each component corresponds to 328 x, y, z directions. 329 330 Note: 331 The sign convention follows: μ_total = μ_nuclear - μ_electronic 332 This gives the dipole moment vector pointing from negative to positive charge. 333 334 TODO: Allow user to specify output units (e.g., Debye). 335 """ 336 nuc_dip = self.get_nuc_dip_moment() 337 elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat) 338 mol_dip = nuc_dip - elec_dip 339 # TODO: allow the user to specify units (ex: DEBYE) 340 return mol_dip 341 342 #Export coord file in TURBOMOLE format 343 def exportCoords(atomicSpecies, coords, filename='coord'): 344 """ 345 Export molecular coordinates to TURBOMOLE format. 346 347 Creates a coordinate file in TURBOMOLE format with atomic positions 348 converted from Angstroms to Bohr units. 349 350 Args: 351 atomicSpecies (list): List of atomic symbols as strings. 352 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 353 filename (str, optional): Output filename. Defaults to 'coord'. 354 355 File Format: 356 $coord 357 x_bohr y_bohr z_bohr element_symbol 358 ... 359 $end 360 361 Note: 362 This is a static method that should be called as a class method. 363 Coordinates are automatically converted from Angstroms to Bohr units. 364 365 Warning: 366 This method contains a bug - it references undefined variable 'coord' 367 instead of 'coords'. Should be fixed in implementation. 368 """ 369 file = open(filename,'w') 370 file.write('$coord\n') 371 for i in range(len(atomicSpecies)): 372 file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n') 373 file.write('$end\n') 374 file.close() 375 376 #Export the geometry in XYZ format 377 def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 378 """ 379 Export molecular geometry to XYZ format file. 380 381 Creates a standard XYZ format file with atomic coordinates in Angstroms. 382 This method uses the molecule's current atomic species and coordinates. 383 384 Args: 385 filename (str, optional): Base filename without extension. Defaults to 'coord'. 386 The '.xyz' extension will be automatically added. 387 label (str, optional): Comment line for the XYZ file. Defaults to 388 'Generic Mol generated by CrysX (Python Library)'. 389 390 File Format: 391 number_of_atoms 392 comment_line 393 element_symbol x_coord y_coord z_coord 394 ... 395 396 Output: 397 Creates a file named '{filename}.xyz' in the current directory. 398 399 Example: 400 mol.exportXYZ('water', 'H2O molecule') 401 # Creates 'water.xyz' with H2O coordinates 402 """ 403 file = open(filename+'.xyz','w') 404 file.write(str(self.natoms)+'\n') 405 file.write(label+'\n') 406 for i in range(self.natoms): 407 file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n') 408 file.close() 409 #Export the geometry in XYZ format 410 def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 411 """ 412 Export molecular geometry to XYZ format (static method version). 413 414 Static method to create XYZ files from provided atomic species and coordinates, 415 without requiring a Mol object instance. 416 417 Args: 418 atomicSpecies (list): List of atomic symbols as strings. 419 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 420 filename (str, optional): Base filename without extension. Defaults to 'coord'. 421 The '.xyz' extension will be automatically added. 422 label (str, optional): Comment line for the XYZ file. Defaults to 423 'Generic Mol generated by CrysX (Python Library)'. 424 425 File Format: 426 number_of_atoms 427 comment_line 428 element_symbol x_coord y_coord z_coord 429 ... 430 431 Warning: 432 This method contains a bug - it references undefined variable 'coord' 433 instead of 'coords'. Should be fixed in implementation. 434 435 Note: 436 This is a static method and should be called as a class method. 437 """ 438 file = open(filename+'.xyz','w') 439 file.write(str(len(atomicSpecies))+'\n') 440 file.write(label+'\n') 441 for i in range(len(atomicSpecies)): 442 file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n') 443 file.close() 444 445 # Read geometry from TURBOOLE coords file 446 def readCoordsFile(self, filename): 447 """ 448 Read molecular coordinates from various file formats. 449 450 Currently supports XYZ format files. This method serves as a dispatcher 451 to format-specific reading methods based on file extension. 452 453 Args: 454 filename (str): Path to the coordinate file. File format is determined 455 by the file extension. 456 457 Returns: 458 list: List of atoms in the format [[symbol, x, y, z], ...] where 459 symbol is the atomic symbol and x, y, z are coordinates in Angstroms. 460 461 Supported Formats: 462 - .xyz: Standard XYZ coordinate format 463 464 Future Extensions: 465 - .mol: MOL file format 466 - coord: TURBOMOLE coordinate format 467 - Other quantum chemistry input formats 468 469 Raises: 470 Prints error message if file format is not supported. 471 """ 472 if filename.endswith('.xyz'): 473 atoms = self.readXYZ(filename) 474 else: 475 print('Only xyz files for now') 476 return atoms 477 #Read geometry information from a .xyz file 478 def readXYZ(self, filename): 479 """ 480 Read molecular geometry from an XYZ format file. 481 482 Parses a standard XYZ file and extracts atomic symbols and coordinates. 483 The XYZ format consists of: 484 - Line 1: Number of atoms 485 - Line 2: Comment line (ignored) 486 - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate 487 488 Args: 489 filename (str): Path to the XYZ file to read. 490 491 Returns: 492 list: List of atoms in format [[symbol, x, y, z], ...] where: 493 - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O') 494 - x, y, z (float): Coordinates in Angstroms 495 496 File Format Expected: 497 number_of_atoms 498 comment_line 499 element_symbol x_coord y_coord z_coord 500 element_symbol x_coord y_coord z_coord 501 ... 502 503 Error Handling: 504 - Prints error if the number of atoms read doesn't match the declared count 505 - Assumes coordinates are separated by whitespace 506 - Strips whitespace from all input 507 508 Example: 509 For a file 'water.xyz': 510 3 511 Water molecule 512 O 0.000000 0.000000 0.000000 513 H 0.000000 0.000000 0.970000 514 H 0.944863 0.000000 -0.242498 515 516 Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]] 517 """ 518 with open(filename, "r") as f: 519 lines = f.readlines() 520 521 if len(lines) < 2: 522 raise ValueError(f"File {filename} does not contain enough lines for XYZ format") 523 524 # Read number of atoms from first line 525 try: 526 natoms = int(lines[0].strip()) 527 except ValueError: 528 raise ValueError(f"First line of {filename} must contain an integer atom count") 529 530 # The second line is the comment (can be blank) 531 comment_line = lines[1].rstrip("\n") 532 533 atoms = [] 534 for i, line in enumerate(lines[2:2 + natoms], start=3): 535 parts = line.split() 536 if len(parts) < 4: 537 raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}") 538 symbol = parts[0] 539 try: 540 x, y, z = map(float, parts[1:4]) 541 except ValueError: 542 raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}") 543 atoms.append([symbol, x, y, z]) 544 545 if len(atoms) != natoms: 546 raise ValueError( 547 f"Expected {natoms} atoms, but found {len(atoms)} in {filename}" 548 ) 549 550 return atoms
Class to store molecular properties and handle molecular structure operations.
This class provides functionality for creating, manipulating, and exporting molecular structures. It supports reading from various coordinate file formats and automatically generates basis set information.
Attributes: coordfile (str): Path to coordinate file atoms (list): List of atoms with coordinates charge (int): Molecular charge basis (Basis): Basis set object for the molecule nelectrons (int): Number of electrons natoms (int): Number of atoms coords (numpy.ndarray): Atomic coordinates in Angstroms coordsBohrs (numpy.ndarray): Atomic coordinates in Bohr units atomicSpecies (list): List of atomic symbols Zcharges (list): List of atomic numbers success (bool): Whether molecule was initialized successfully label (str): Molecular label/description
44 def __init__(self, atoms=None, coordfile=None, charge=0, basis=None): 45 """ 46 Initialize a Mol object with atomic coordinates and properties. 47 48 Args: 49 atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] 50 where symbol can be atomic symbol (str) or atomic number (int), 51 and x, y, z are coordinates in Angstroms. 52 coordfile (str, optional): Path to coordinate file (.xyz format supported). 53 charge (int, optional): Molecular charge. Defaults to 0. 54 basis (dict, optional): Basis set specification. If None, defaults to STO-3G 55 for all atoms. 56 57 Raises: 58 ValueError: If neither atoms nor coordfile is provided. 59 TypeError: If coordfile is not a string. 60 61 Note: 62 If both atoms and coordfile are provided, atoms takes precedence and 63 coordfile is ignored. 64 """ 65 #If no atoms or coordfile is specified. 66 if atoms is None and coordfile is None: 67 print('Error: No atoms and their coords specified.\nPlease either specify .xyz/.mol/coord file or enter atoms manually.') 68 print('See manual for supported file formats.') 69 #If both atoms and coordfile are specified then atoms are considered and the coord file is not considered. 70 elif not coordfile is None and not atoms is None: 71 print('Remark: User provided both atoms as well as the coordfile.\nOnly atoms are being used.') 72 elif coordfile is not None and atoms is None: 73 if isinstance(coordfile, str): 74 atoms = self.readCoordsFile(coordfile) 75 else: 76 print('Error: The coordfile argument should be a string with the name of the coord file.') 77 78 79 #Coord file .xyz/.mol/TURBOMOLE coord file/QE input file/Orca input file 80 self.coordfile = coordfile 81 """str or None: Path to the coordinate file used to initialize the molecule. 82 Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) 83 if the molecule was created from a file, or None if created directly from atoms list.""" 84 #Atoms and their coordinates 85 self.atoms = atoms 86 """list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where: 87 - symbol: Atomic symbol (str) or atomic number (int) 88 - x, y, z: Cartesian coordinates in Angstroms (float) 89 This is the raw input data used to construct the molecule.""" 90 #Molecular charge 91 self.charge = charge 92 """int: Net charge of the molecule in elementary charge units. 93 Positive values indicate cationic species, negative values indicate anionic species, 94 and 0 indicates a neutral molecule. Used to calculate the total number of electrons.""" 95 #The basis object for the Mol object 96 self.basis = None 97 """Basis or None: Basis set object containing all basis function information for quantum 98 chemical calculations. Initialized as None and populated with a Basis instance that 99 handles basis set assignments for each atom in the molecule.""" 100 101 self.nelectrons = 0 102 """int: Total number of electrons in the molecule. 103 Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge: 104 nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i.""" 105 self.natoms = 0 106 """int: Total number of atoms in the molecule. 107 Incremented during atom validation and used for array dimensioning and iteration 108 over atomic properties.""" 109 self.coords = [] 110 """numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3). 111 Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list 112 and populated during atom validation to become a numpy array.""" 113 self.coordsBohrs = [] 114 """numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3). 115 Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. 116 Used for quantum chemical calculations requiring atomic units of length.""" 117 self.atomicSpecies = [] 118 """list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']). 119 Ordered the same as atoms appear in the molecule. Length equals natoms. 120 Used for identifying atom types and output formatting.""" 121 122 self.Zcharges = [] 123 """list of int: List of atomic numbers (nuclear charges) for each atom in the molecule. 124 Corresponds to the number of protons in each atomic nucleus. Used for calculating 125 electronic properties and nuclear contributions to various molecular properties.""" 126 self.success = False 127 """bool: Flag indicating whether the molecule was successfully initialized and validated. 128 Set to True if all atoms have valid symbols/atomic numbers and properly formatted 129 coordinates, False otherwise. Used to prevent operations on invalid molecular data.""" 130 131 self.label = 'Generic mol' 132 """str: Descriptive label or name for the molecule. 133 Defaults to 'Generic mol' but can be customized. Used in output files and for 134 identification purposes when exporting molecular data to various file formats.""" 135 #Validate if the atoms attribute are provided correctly 136 self.success = self.validateAtoms(atoms) 137 if self.success: 138 self.nelectrons = self.nelectrons+charge 139 self.coordsBohrs = self.coords*Data.Angs2BohrFactor 140 #Basis set names for the atoms. 141 #Either just one basis function can be specified for all atoms 142 #Or one can specify a particular basis set for a particular atom. 143 #'basis' is a dictionary specifying the basis set to be used for a particular atom 144 if basis is None: 145 #Default basis set is sto-3g for all atoms 146 self.basis = {'all':Basis.load(mol=self, basis_name='sto-3g')}#{'all','sto-3g'} 147 self.basis = Basis(self, self.basis) 148 else: 149 self.basis = Basis(self, basis)
Initialize a Mol object with atomic coordinates and properties.
Args: atoms (list, optional): List of atoms in the format [[symbol, x, y, z], ...] where symbol can be atomic symbol (str) or atomic number (int), and x, y, z are coordinates in Angstroms. coordfile (str, optional): Path to coordinate file (.xyz format supported). charge (int, optional): Molecular charge. Defaults to 0. basis (dict, optional): Basis set specification. If None, defaults to STO-3G for all atoms.
Raises: ValueError: If neither atoms nor coordfile is provided. TypeError: If coordfile is not a string.
Note: If both atoms and coordfile are provided, atoms takes precedence and coordfile is ignored.
str or None: Path to the coordinate file used to initialize the molecule. Contains the filename of the original coordinate file (.xyz, .mol, TURBOMOLE coord, etc.) if the molecule was created from a file, or None if created directly from atoms list.
list: List of atoms with their coordinates in the format [[symbol, x, y, z], ...] where:
- symbol: Atomic symbol (str) or atomic number (int)
- x, y, z: Cartesian coordinates in Angstroms (float) This is the raw input data used to construct the molecule.
int: Net charge of the molecule in elementary charge units. Positive values indicate cationic species, negative values indicate anionic species, and 0 indicates a neutral molecule. Used to calculate the total number of electrons.
Basis or None: Basis set object containing all basis function information for quantum chemical calculations. Initialized as None and populated with a Basis instance that handles basis set assignments for each atom in the molecule.
int: Total number of electrons in the molecule. Calculated as the sum of all atomic numbers (nuclear charges) adjusted by the molecular charge: nelectrons = Σ(Z_i) - charge, where Z_i is the atomic number of atom i.
int: Total number of atoms in the molecule. Incremented during atom validation and used for array dimensioning and iteration over atomic properties.
numpy.ndarray: Atomic coordinates in Angstroms with shape (natoms, 3). Each row contains the [x, y, z] coordinates of one atom. Initialized as empty list and populated during atom validation to become a numpy array.
numpy.ndarray: Atomic coordinates converted to Bohr units with shape (natoms, 3). Calculated from self.coords using the conversion factor Data.Angs2BohrFactor. Used for quantum chemical calculations requiring atomic units of length.
list of str: List of atomic symbols as strings (e.g., ['H', 'C', 'N', 'O']). Ordered the same as atoms appear in the molecule. Length equals natoms. Used for identifying atom types and output formatting.
list of int: List of atomic numbers (nuclear charges) for each atom in the molecule. Corresponds to the number of protons in each atomic nucleus. Used for calculating electronic properties and nuclear contributions to various molecular properties.
bool: Flag indicating whether the molecule was successfully initialized and validated. Set to True if all atoms have valid symbols/atomic numbers and properly formatted coordinates, False otherwise. Used to prevent operations on invalid molecular data.
str: Descriptive label or name for the molecule. Defaults to 'Generic mol' but can be customized. Used in output files and for identification purposes when exporting molecular data to various file formats.
155 def validateAtoms(self, atoms): 156 """ 157 Validate atomic symbols and coordinates provided in the atoms list. 158 159 This method checks that atomic symbols are valid (either as strings matching 160 known element symbols or as integers representing atomic numbers), and that 161 coordinates are properly formatted as numerical values. 162 163 Args: 164 atoms (list): List of atoms in format [[symbol, x, y, z], ...] where 165 symbol can be str (element symbol) or int (atomic number), 166 and x, y, z are numerical coordinates. 167 168 Returns: 169 bool: True if all atoms are valid, False otherwise. 170 171 Side Effects: 172 Updates the following instance attributes: 173 - atomicSpecies: List of atomic symbols 174 - Zcharges: List of atomic numbers 175 - nelectrons: Total number of electrons 176 - natoms: Number of atoms 177 - coords: Numpy array of coordinates 178 179 Raises: 180 Prints error messages for invalid atomic symbols, coordinates, or formatting. 181 """ 182 #Validate atomic symbols 183 for i in range(len(atoms)): 184 if isinstance(atoms[i][0], str): 185 elementSymbols = [x.lower() for x in Data.elementSymbols] 186 if atoms[i][0].lower() in elementSymbols: 187 self.atomicSpecies.append(atoms[i][0]) 188 Z = elementSymbols.index(atoms[i][0].lower()) 189 self.Zcharges.append(Z) 190 self.nelectrons = self.nelectrons + Z 191 self.natoms = self.natoms + 1 192 else: 193 print('Error: Unknown atomic symbols found') 194 return False 195 elif isinstance(atoms[i][0], int): 196 if atoms[i][0]<=118 and atoms[i][0]>=0: 197 self.atomicSpecies.append(Data.elementSymbols[atoms[i][0]]) 198 Z = atoms[i][0] 199 self.Zcharges.append(Z) 200 self.nelectrons = self.nelectrons + Z 201 self.natoms = self.natoms + 1 202 else: 203 print('Error: Found atomic number out of the valid range') 204 return False 205 else: 206 print('Error: Found something other than string or integer for atomic symbols/numbers') 207 return False 208 self.coords = np.zeros([self.natoms,3]) 209 #Validate atomic coords 210 for i in range(len(atoms)): 211 if len(atoms[i])==4: 212 #TODO: the following condition doesn't work 213 #FIXME 214 if ( isinstance(coordi, float) for coordi in atoms[i][1:4] ) or ( isinstance(coordi, int) for coordi in atoms[i][1:4]): 215 self.coords[i] = atoms[i][1:4] 216 else: 217 print('Error: Coordinates of atoms should be floats/integers.') 218 return False 219 220 else: 221 print('Error: Illegal definition of coords in atoms. There should be all three x,y,z coordinates.\nSome seem to be missing.') 222 return False 223 224 if self.natoms==self.coords.shape[0]: 225 return True 226 else: 227 print('Error: Some definitions of atomic symbols/coords were illegal')
Validate atomic symbols and coordinates provided in the atoms list.
This method checks that atomic symbols are valid (either as strings matching known element symbols or as integers representing atomic numbers), and that coordinates are properly formatted as numerical values.
Args: atoms (list): List of atoms in format [[symbol, x, y, z], ...] where symbol can be str (element symbol) or int (atomic number), and x, y, z are numerical coordinates.
Returns: bool: True if all atoms are valid, False otherwise.
Side Effects: Updates the following instance attributes: - atomicSpecies: List of atomic symbols - Zcharges: List of atomic numbers - nelectrons: Total number of electrons - natoms: Number of atoms - coords: Numpy array of coordinates
Raises: Prints error messages for invalid atomic symbols, coordinates, or formatting.
230 def get_center_of_charge(self, units='angs'): 231 """ 232 Calculate the center of charge (weighted by atomic numbers) of the molecule. 233 234 The center of charge is computed as the weighted average of atomic positions, 235 where the weights are the atomic numbers (nuclear charges) of each atom. 236 237 Returns: 238 numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms. 239 Returns None if the molecule was not successfully initialized. 240 241 Formula: 242 center_of_charge = Σ(Z_i * r_i) / Σ(Z_i) 243 where Z_i is the atomic number and r_i is the position of atom i. 244 """ 245 if not self.success: 246 print("Error: Mol object is not successfully initialized.") 247 return None 248 249 # Use NumPy for efficient calculations 250 total_charge = np.sum(self.Zcharges) 251 if units=='angs': 252 coc = np.dot(self.Zcharges, self.coords) / total_charge 253 else: 254 coc = np.dot(self.Zcharges, self.coordsBohrs) / total_charge 255 256 return coc 257 # # Use NumPy for efficient calculations 258 # total_mass = np.sum(self.Zcharges) 259 # com_x = np.sum(self.Zcharges * self.coords[:, 0]) / total_mass 260 # com_y = np.sum(self.Zcharges * self.coords[:, 1]) / total_mass 261 # com_z = np.sum(self.Zcharges * self.coords[:, 2]) / total_mass 262 263 # com = np.array([com_x, com_y, com_z]) 264 # return com
Calculate the center of charge (weighted by atomic numbers) of the molecule.
The center of charge is computed as the weighted average of atomic positions, where the weights are the atomic numbers (nuclear charges) of each atom.
Returns: numpy.ndarray or None: 3D coordinates of the center of charge in Angstroms. Returns None if the molecule was not successfully initialized.
Formula: center_of_charge = Σ(Z_i * r_i) / Σ(Z_i) where Z_i is the atomic number and r_i is the position of atom i.
266 def get_nuc_dip_moment(self): 267 """ 268 Calculate the nuclear contribution to the molecular dipole moment. 269 270 The nuclear dipole moment is computed as the sum of nuclear charges 271 multiplied by their positions in atomic units (Bohr). 272 273 Returns: 274 numpy.ndarray: 3D vector representing the nuclear dipole moment 275 in atomic units (e⋅a₀), where each component corresponds 276 to x, y, z directions. 277 278 Formula: 279 μ_nuc = Σ(Z_i * r_i) 280 where Z_i is the nuclear charge and r_i is the position in Bohr units. 281 """ 282 charges = self.Zcharges 283 coords = self.coordsBohrs 284 nuc_dip = np.einsum('i,ix->x', charges, coords) 285 return nuc_dip
Calculate the nuclear contribution to the molecular dipole moment.
The nuclear dipole moment is computed as the sum of nuclear charges multiplied by their positions in atomic units (Bohr).
Returns: numpy.ndarray: 3D vector representing the nuclear dipole moment in atomic units (e⋅a₀), where each component corresponds to x, y, z directions.
Formula: μ_nuc = Σ(Z_i * r_i) where Z_i is the nuclear charge and r_i is the position in Bohr units.
287 def get_elec_dip_moment(self, dipole_moment_matrix, dmat): 288 """ 289 Calculate the electronic contribution to the molecular dipole moment. 290 291 The electronic dipole moment is computed using the dipole moment integrals 292 and the density matrix from quantum chemical calculations. 293 294 Args: 295 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 296 with shape (3, nbasis, nbasis), where 297 the first dimension corresponds to x, y, z. 298 dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis). 299 300 Returns: 301 numpy.ndarray: 3D vector representing the electronic dipole moment 302 in atomic units (e⋅a₀), where each component corresponds 303 to x, y, z directions. 304 305 Formula: 306 μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩ 307 where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals. 308 """ 309 elec_dip = np.einsum('xij,ji->x', dipole_moment_matrix, dmat).real 310 return elec_dip
Calculate the electronic contribution to the molecular dipole moment.
The electronic dipole moment is computed using the dipole moment integrals and the density matrix from quantum chemical calculations.
Args: dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals with shape (3, nbasis, nbasis), where the first dimension corresponds to x, y, z. dmat (numpy.ndarray): Density matrix with shape (nbasis, nbasis).
Returns: numpy.ndarray: 3D vector representing the electronic dipole moment in atomic units (e⋅a₀), where each component corresponds to x, y, z directions.
Formula: μ_elec = Σ_μν P_μν ⟨μ|r|ν⟩ where P_μν is the density matrix and ⟨μ|r|ν⟩ are dipole integrals.
312 def get_dipole_moment(self, dipole_moment_matrix, dmat): 313 """ 314 Calculate the total molecular dipole moment. 315 316 The total dipole moment is computed as the difference between nuclear 317 and electronic contributions: μ_total = μ_nuclear - μ_electronic 318 319 Args: 320 dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals 321 with shape (3, nbasis, nbasis). 322 dmat (numpy.ndarray): Density matrix from quantum chemical calculation 323 with shape (nbasis, nbasis). 324 325 Returns: 326 numpy.ndarray: 3D vector representing the total molecular dipole moment 327 in atomic units (e⋅a₀). Each component corresponds to 328 x, y, z directions. 329 330 Note: 331 The sign convention follows: μ_total = μ_nuclear - μ_electronic 332 This gives the dipole moment vector pointing from negative to positive charge. 333 334 TODO: Allow user to specify output units (e.g., Debye). 335 """ 336 nuc_dip = self.get_nuc_dip_moment() 337 elec_dip = self.get_elec_dip_moment(dipole_moment_matrix, dmat) 338 mol_dip = nuc_dip - elec_dip 339 # TODO: allow the user to specify units (ex: DEBYE) 340 return mol_dip
Calculate the total molecular dipole moment.
The total dipole moment is computed as the difference between nuclear and electronic contributions: μ_total = μ_nuclear - μ_electronic
Args: dipole_moment_matrix (numpy.ndarray): 3D array of dipole moment integrals with shape (3, nbasis, nbasis). dmat (numpy.ndarray): Density matrix from quantum chemical calculation with shape (nbasis, nbasis).
Returns: numpy.ndarray: 3D vector representing the total molecular dipole moment in atomic units (e⋅a₀). Each component corresponds to x, y, z directions.
Note: The sign convention follows: μ_total = μ_nuclear - μ_electronic This gives the dipole moment vector pointing from negative to positive charge.
TODO: Allow user to specify output units (e.g., Debye).
343 def exportCoords(atomicSpecies, coords, filename='coord'): 344 """ 345 Export molecular coordinates to TURBOMOLE format. 346 347 Creates a coordinate file in TURBOMOLE format with atomic positions 348 converted from Angstroms to Bohr units. 349 350 Args: 351 atomicSpecies (list): List of atomic symbols as strings. 352 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 353 filename (str, optional): Output filename. Defaults to 'coord'. 354 355 File Format: 356 $coord 357 x_bohr y_bohr z_bohr element_symbol 358 ... 359 $end 360 361 Note: 362 This is a static method that should be called as a class method. 363 Coordinates are automatically converted from Angstroms to Bohr units. 364 365 Warning: 366 This method contains a bug - it references undefined variable 'coord' 367 instead of 'coords'. Should be fixed in implementation. 368 """ 369 file = open(filename,'w') 370 file.write('$coord\n') 371 for i in range(len(atomicSpecies)): 372 file.write(str(coord[i,0]*Data.Angs2BohrFactor)+'\t'+str(coord[i,1]*Data.Angs2BohrFactor)+'\t'+str(coord[i,2]*Data.Angs2BohrFactor)+'\t'+atomicSpecies[i].lower()+'\n') 373 file.write('$end\n') 374 file.close()
Export molecular coordinates to TURBOMOLE format.
Creates a coordinate file in TURBOMOLE format with atomic positions converted from Angstroms to Bohr units.
Args: atomicSpecies (list): List of atomic symbols as strings. coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). filename (str, optional): Output filename. Defaults to 'coord'.
File Format: $coord x_bohr y_bohr z_bohr element_symbol ... $end
Note: This is a static method that should be called as a class method. Coordinates are automatically converted from Angstroms to Bohr units.
Warning: This method contains a bug - it references undefined variable 'coord' instead of 'coords'. Should be fixed in implementation.
377 def exportXYZ(self, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 378 """ 379 Export molecular geometry to XYZ format file. 380 381 Creates a standard XYZ format file with atomic coordinates in Angstroms. 382 This method uses the molecule's current atomic species and coordinates. 383 384 Args: 385 filename (str, optional): Base filename without extension. Defaults to 'coord'. 386 The '.xyz' extension will be automatically added. 387 label (str, optional): Comment line for the XYZ file. Defaults to 388 'Generic Mol generated by CrysX (Python Library)'. 389 390 File Format: 391 number_of_atoms 392 comment_line 393 element_symbol x_coord y_coord z_coord 394 ... 395 396 Output: 397 Creates a file named '{filename}.xyz' in the current directory. 398 399 Example: 400 mol.exportXYZ('water', 'H2O molecule') 401 # Creates 'water.xyz' with H2O coordinates 402 """ 403 file = open(filename+'.xyz','w') 404 file.write(str(self.natoms)+'\n') 405 file.write(label+'\n') 406 for i in range(self.natoms): 407 file.write(self.atomicSpecies[i]+'\t'+str(self.coords[i,0])+'\t'+str(self.coords[i,1])+'\t'+str(self.coords[i,2])+'\n') 408 file.close()
Export molecular geometry to XYZ format file.
Creates a standard XYZ format file with atomic coordinates in Angstroms. This method uses the molecule's current atomic species and coordinates.
Args: filename (str, optional): Base filename without extension. Defaults to 'coord'. The '.xyz' extension will be automatically added. label (str, optional): Comment line for the XYZ file. Defaults to 'Generic Mol generated by CrysX (Python Library)'.
File Format: number_of_atoms comment_line element_symbol x_coord y_coord z_coord ...
Output: Creates a file named '{filename}.xyz' in the current directory.
Example: mol.exportXYZ('water', 'H2O molecule') # Creates 'water.xyz' with H2O coordinates
410 def exportXYZs(atomicSpecies, coords, filename='coord', label='Generic Mol generated by CrysX (Python Library)'): 411 """ 412 Export molecular geometry to XYZ format (static method version). 413 414 Static method to create XYZ files from provided atomic species and coordinates, 415 without requiring a Mol object instance. 416 417 Args: 418 atomicSpecies (list): List of atomic symbols as strings. 419 coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). 420 filename (str, optional): Base filename without extension. Defaults to 'coord'. 421 The '.xyz' extension will be automatically added. 422 label (str, optional): Comment line for the XYZ file. Defaults to 423 'Generic Mol generated by CrysX (Python Library)'. 424 425 File Format: 426 number_of_atoms 427 comment_line 428 element_symbol x_coord y_coord z_coord 429 ... 430 431 Warning: 432 This method contains a bug - it references undefined variable 'coord' 433 instead of 'coords'. Should be fixed in implementation. 434 435 Note: 436 This is a static method and should be called as a class method. 437 """ 438 file = open(filename+'.xyz','w') 439 file.write(str(len(atomicSpecies))+'\n') 440 file.write(label+'\n') 441 for i in range(len(atomicSpecies)): 442 file.write(atomicSpecies[i]+'\t'+str(coord[i,0])+'\t'+str(coord[i,1])+'\t'+str(coord[i,2])+'\n') 443 file.close()
Export molecular geometry to XYZ format (static method version).
Static method to create XYZ files from provided atomic species and coordinates, without requiring a Mol object instance.
Args: atomicSpecies (list): List of atomic symbols as strings. coords (numpy.ndarray): Atomic coordinates in Angstroms with shape (natoms, 3). filename (str, optional): Base filename without extension. Defaults to 'coord'. The '.xyz' extension will be automatically added. label (str, optional): Comment line for the XYZ file. Defaults to 'Generic Mol generated by CrysX (Python Library)'.
File Format: number_of_atoms comment_line element_symbol x_coord y_coord z_coord ...
Warning: This method contains a bug - it references undefined variable 'coord' instead of 'coords'. Should be fixed in implementation.
Note: This is a static method and should be called as a class method.
446 def readCoordsFile(self, filename): 447 """ 448 Read molecular coordinates from various file formats. 449 450 Currently supports XYZ format files. This method serves as a dispatcher 451 to format-specific reading methods based on file extension. 452 453 Args: 454 filename (str): Path to the coordinate file. File format is determined 455 by the file extension. 456 457 Returns: 458 list: List of atoms in the format [[symbol, x, y, z], ...] where 459 symbol is the atomic symbol and x, y, z are coordinates in Angstroms. 460 461 Supported Formats: 462 - .xyz: Standard XYZ coordinate format 463 464 Future Extensions: 465 - .mol: MOL file format 466 - coord: TURBOMOLE coordinate format 467 - Other quantum chemistry input formats 468 469 Raises: 470 Prints error message if file format is not supported. 471 """ 472 if filename.endswith('.xyz'): 473 atoms = self.readXYZ(filename) 474 else: 475 print('Only xyz files for now') 476 return atoms
Read molecular coordinates from various file formats.
Currently supports XYZ format files. This method serves as a dispatcher to format-specific reading methods based on file extension.
Args: filename (str): Path to the coordinate file. File format is determined by the file extension.
Returns: list: List of atoms in the format [[symbol, x, y, z], ...] where symbol is the atomic symbol and x, y, z are coordinates in Angstroms.
Supported Formats: - .xyz: Standard XYZ coordinate format
Future Extensions: - .mol: MOL file format - coord: TURBOMOLE coordinate format - Other quantum chemistry input formats
Raises: Prints error message if file format is not supported.
478 def readXYZ(self, filename): 479 """ 480 Read molecular geometry from an XYZ format file. 481 482 Parses a standard XYZ file and extracts atomic symbols and coordinates. 483 The XYZ format consists of: 484 - Line 1: Number of atoms 485 - Line 2: Comment line (ignored) 486 - Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate 487 488 Args: 489 filename (str): Path to the XYZ file to read. 490 491 Returns: 492 list: List of atoms in format [[symbol, x, y, z], ...] where: 493 - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O') 494 - x, y, z (float): Coordinates in Angstroms 495 496 File Format Expected: 497 number_of_atoms 498 comment_line 499 element_symbol x_coord y_coord z_coord 500 element_symbol x_coord y_coord z_coord 501 ... 502 503 Error Handling: 504 - Prints error if the number of atoms read doesn't match the declared count 505 - Assumes coordinates are separated by whitespace 506 - Strips whitespace from all input 507 508 Example: 509 For a file 'water.xyz': 510 3 511 Water molecule 512 O 0.000000 0.000000 0.000000 513 H 0.000000 0.000000 0.970000 514 H 0.944863 0.000000 -0.242498 515 516 Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]] 517 """ 518 with open(filename, "r") as f: 519 lines = f.readlines() 520 521 if len(lines) < 2: 522 raise ValueError(f"File {filename} does not contain enough lines for XYZ format") 523 524 # Read number of atoms from first line 525 try: 526 natoms = int(lines[0].strip()) 527 except ValueError: 528 raise ValueError(f"First line of {filename} must contain an integer atom count") 529 530 # The second line is the comment (can be blank) 531 comment_line = lines[1].rstrip("\n") 532 533 atoms = [] 534 for i, line in enumerate(lines[2:2 + natoms], start=3): 535 parts = line.split() 536 if len(parts) < 4: 537 raise ValueError(f"Line {i} in {filename} does not have 4 columns: {line.strip()}") 538 symbol = parts[0] 539 try: 540 x, y, z = map(float, parts[1:4]) 541 except ValueError: 542 raise ValueError(f"Invalid coordinates on line {i} in {filename}: {parts[1:4]}") 543 atoms.append([symbol, x, y, z]) 544 545 if len(atoms) != natoms: 546 raise ValueError( 547 f"Expected {natoms} atoms, but found {len(atoms)} in {filename}" 548 ) 549 550 return atoms
Read molecular geometry from an XYZ format file.
Parses a standard XYZ file and extracts atomic symbols and coordinates. The XYZ format consists of:
- Line 1: Number of atoms
- Line 2: Comment line (ignored)
- Lines 3+: atomic_symbol x_coordinate y_coordinate z_coordinate
Args: filename (str): Path to the XYZ file to read.
Returns: list: List of atoms in format [[symbol, x, y, z], ...] where: - symbol (str): Atomic symbol (e.g., 'H', 'C', 'O') - x, y, z (float): Coordinates in Angstroms
File Format Expected: number_of_atoms comment_line element_symbol x_coord y_coord z_coord element_symbol x_coord y_coord z_coord ...
Error Handling: - Prints error if the number of atoms read doesn't match the declared count - Assumes coordinates are separated by whitespace - Strips whitespace from all input
Example: For a file 'water.xyz': 3 Water molecule O 0.000000 0.000000 0.000000 H 0.000000 0.000000 0.970000 H 0.944863 0.000000 -0.242498
Returns: [['O', 0.0, 0.0, 0.0], ['H', 0.0, 0.0, 0.97], ['H', 0.944863, 0.0, -0.242498]]