diff --git a/.gitignore b/.gitignore index b6e86d7..47d7205 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,6 @@ cython_debug/ # VASP files *POTCAR* + +# API keys +*.key diff --git a/README.md b/README.md index ac3e72d..aad467a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,9 @@ Therefore, the development of tools and environments for the rapid generation of Cif2x is a tool to generate input files for first-principles calculation software. It takes crystal structure data in CIF format and input parameters as a template, and constructs the parts that vary depending on the type of material and conditions. It is capable of generating multiple input files tailored to specific computational conditions. Currently, it supports [VASP](https://www.vasp.at), [Quantum ESPRESSO](https://www.quantum-espresso.org), [OpenMX](http://www.openmx-square.org), and [AkaiKKR](http://kkr.issp.u-tokyo.ac.jp). +getcif is a tool to retrieve crystallographic information and other properties of materials from databases. The latest version of getcif provides access to Materials Project database. Users can search database and obtain information by specifying symmetry, composition, or physical properties of materials. + + ## Target applications Quantum ESPRESSO, VASP, OpenMX, and AkaiKKR. @@ -14,6 +17,8 @@ Quantum ESPRESSO, VASP, OpenMX, and AkaiKKR. Python3 with pymatgen, qe-tools, AkaiKKRPythonUtil, and other library packages. +getcif requires mp-api, pymatgen, and other library packages. + ## Install - From source diff --git a/docs/en/source/_static/.placeholder b/docs/en/source/_static/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/docs/en/source/cif2x/basic-usage.rst b/docs/en/source/cif2x/basic-usage.rst index 9aabaf6..b30186f 100644 --- a/docs/en/source/cif2x/basic-usage.rst +++ b/docs/en/source/cif2x/basic-usage.rst @@ -16,6 +16,11 @@ Installation and basic usage - OpenBabel module (optional) - AkaiKKRPythonUtil module + For A tool to retrieve crystallographic data from databases ``getcif``, the additional library is required: + + - mp-api module + + **Official pages** - `GitHub repository `_ @@ -37,7 +42,7 @@ Installation and basic usage $ cd ./cif2x $ python3 -m pip install . - The executable file ``cif2x`` will be installed. + The executable files ``cif2x`` and ``getcif`` will be installed. You may need to add ``--user`` option next to ``install`` keyword above in case you are not allowed to install packages system-wide. AkaiKKRPythonUtil module need to be installed separately. The source package is available from `the repository `_. Then follow the steps below to install the module along with the required seaborn module: @@ -86,6 +91,9 @@ Installation and basic usage | | |-- read_input.py | | |-- run_cif2kkr.py | |-- utils.py + | |-- getcif/ + | |-- __init__.py + | |-- main.py |-- sample/ diff --git a/docs/en/source/cif2x/index.rst b/docs/en/source/cif2x/index.rst index 24694f7..b43528b 100644 --- a/docs/en/source/cif2x/index.rst +++ b/docs/en/source/cif2x/index.rst @@ -13,3 +13,4 @@ Input file generator for first-principles calculations (cif2x) command/index filespec/index appendix/index + ../getcif/index diff --git a/docs/en/source/conf.py b/docs/en/source/conf.py index 721ef2b..59edbe6 100644 --- a/docs/en/source/conf.py +++ b/docs/en/source/conf.py @@ -26,9 +26,9 @@ # built documents. # # The short X.Y version. -version = '1.0' +version = '1.1' # The full version, including alpha/beta/rc tags -release = '1.0.1' +release = '1.1.0' # -- General configuration --------------------------------------------------- diff --git a/docs/en/source/getcif/about/index.rst b/docs/en/source/getcif/about/index.rst new file mode 100644 index 0000000..1f6249f --- /dev/null +++ b/docs/en/source/getcif/about/index.rst @@ -0,0 +1,5 @@ +**************************************************************** +Introduction +**************************************************************** + +``getcif`` is a tool to retrieve crystallographic information and other properties of materials from databases. The latest version of getcif provides access to Materials Project database. Users can search database and obtain information by specifying symmetry, composition, or physical properties of materials. diff --git a/docs/en/source/getcif/appendix/index.rst b/docs/en/source/getcif/appendix/index.rst new file mode 100644 index 0000000..7959517 --- /dev/null +++ b/docs/en/source/getcif/appendix/index.rst @@ -0,0 +1,320 @@ +================================================================ +Parameter List +================================================================ + +Search conditions (properties) +---------------------------------------------------------------- + +Table :ref:`getcif-cond-table` summarizes condition terms available in the properties section. + +``getcif`` uses the ``mp-api`` library provided by Materials Project as a client for accessing the database via Materials Project API. The condition terms correspond to the parameters for the ``materials.summary.search`` method of MPRester class in ``mp-api``. (The content of the table is taken and reformatted from the comments of the source file in ``mpi-api``.) + +The types of the parameter values denote as follows: + +- ``str``: a string +- ``List[str]``: a list of strings +- ``str | List[str]``: a string or a list of strings +- ``int``: an integer +- ``bool``: a boolean value (``true`` or ``false``) +- ``Tuple[float,float]``: a pair of two floating point numbers (as a list) +- ``Tuple[int,int]``: a pair of two integers (as a list) +- ``CrystalSystem``: a string representing the crystal system, one of the following: Triclinic, Monoclinic, Orthorhombic, Tetragonal, Trigonal, Hexagonal, Cubic +- ``List[HasProps]``: a list of strings representing the properties defined in ``emmet.core.summary``. The available terms include: + + absorption, + bandstructure, + charge_density, + chemenv, + dielectric, + dos, + elasticity, + electronic_structure, + eos, + grain_boundaries, + insertion_electrodes, + magnetism, + materials, + oxi_states, + phonon, + piezoelectric, + provenance, + substrates, + surface_properties, + thermo, + xas + +- ``Ordering``: a string representing the magnetic ordering, one of the following: FM, AFM, FiM, NM + +A list of the values is described in an indented style or in a comma-separated bracketted style in YAML notation. It is also available that it is described as a space-separated list. + +A ``Tuple`` is used to denote a range of values by ``min`` and ``max``. It is described by a list of two numbers, as well as by a space-separated list as ``min max``. +The following notation is also available: + + ``< max`` + less than or equal to ``max`` + + ``> min`` + more than or equal to ``min`` + + ``min ~ max`` + between ``min`` and ``max`` + + +.. _getcif-cond-table: + +.. list-table:: Search criteria + :widths: 30 20 60 + :header-rows: 1 + + * - Keyword + - Type + - Description + * - band_gap + - Tuple[float,float] + - Minimum and maximum band gap in eV to consider. + * - chemsys + - str | List[str] + - A chemical system, list of chemical systems (e.g., Li-Fe-O, Si-\*, [Si-O, Li-Fe-P]), or single formula (e.g., Fe2O3, Si\*). + * - crystal_system + - CrystalSystem + - Crystal system of material. + * - density + - Tuple[float,float] + - Minimum and maximum density to consider. + * - deprecated + - bool + - Whether the material is tagged as deprecated. + * - e_electronic + - Tuple[float,float] + - Minimum and maximum electronic dielectric constant to consider. + * - e_ionic + - Tuple[float,float] + - Minimum and maximum ionic dielectric constant to consider. + * - e_total + - Tuple[float,float] + - Minimum and maximum total dielectric constant to consider. + * - efermi + - Tuple[float,float] + - Minimum and maximum fermi energy in eV to consider. + * - elastic_anisotropy + - Tuple[float,float] + - Minimum and maximum value to consider for the elastic anisotropy. + * - elements + - List[str] + - A list of elements. + * - energy_above_hull + - Tuple[int,int] + - Minimum and maximum energy above the hull in eV/atom to consider. + * - equilibrium_reaction_energy + - Tuple[float,float] + - Minimum and maximum equilibrium reaction energy in eV/atom to consider. + * - exclude_elements + - List[str] + - List of elements to exclude. + * - formation_energy + - Tuple[int,int] + - Minimum and maximum formation energy in eV/atom to consider. + * - formula + - str | List[str] + - A formula including anonymized formula or wild cards (e.g., Fe2O3, ABO3, Si\*). A list of chemical formulas can also be passed (e.g., [Fe2O3, ABO3]). + * - g_reuss + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Reuss average of the shear modulus. + * - g_voigt + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt average of the shear modulus. + * - g_vrh + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt-Reuss-Hill average of the shear modulus. + * - has_props + - List[HasProps] + - The calculated properties available for the material. + * - has_reconstructed + - bool + - Whether the entry has any reconstructed surfaces. + * - is_gap_direct + - bool + - Whether the material has a direct band gap. + * - is_metal + - bool + - Whether the material is considered a metal. + * - is_stable + - bool + - Whether the material lies on the convex energy hull. + * - k_reuss + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Reuss average of the bulk modulus. + * - k_voigt + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt average of the bulk modulus. + * - k_vrh + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt-Reuss-Hill average of the bulk modulus. + * - magnetic_ordering + - Ordering + - Magnetic ordering of the material. + * - material_ids + - List[str] + - List of Materials Project IDs to return data for. + * - n + - Tuple[float,float] + - Minimum and maximum refractive index to consider. + * - num_elements + - Tuple[int,int] + - Minimum and maximum number of elements to consider. + * - num_sites + - Tuple[int,int] + - Minimum and maximum number of sites to consider. + * - num_magnetic_sites + - Tuple[int,int] + - Minimum and maximum number of magnetic sites to consider. + * - num_unique_magnetic_sites + - Tuple[int,int] + - Minimum and maximum number of unique magnetic sites to consider. + * - piezoelectric_modulus + - Tuple[float,float] + - Minimum and maximum piezoelectric modulus to consider. + * - poisson_ratio + - Tuple[float,float] + - Minimum and maximum value to consider for Poisson's ratio. + * - possible_species + - List[str] + - List of element symbols appended with oxidation states. (e.g. Cr2+,O2-) + * - shape_factor + - Tuple[float,float] + - Minimum and maximum shape factor values to consider. + * - spacegroup_number + - int + - Space group number of material. + * - spacegroup_symbol + - str + - Space group symbol of the material in international short symbol notation. + * - surface_energy_anisotropy + - Tuple[float,float] + - Minimum and maximum surface energy anisotropy values to consider. + * - theoretical + - bool + - Whether the material is theoretical. + * - total_energy + - Tuple[int,int] + - Minimum and maximum corrected total energy in eV/atom to consider. + * - total_magnetization + - Tuple[float,float] + - Minimum and maximum total magnetization values to consider. + * - total_magnetization_normalized_formula_units + - Tuple[float,float] + - Minimum and maximum total magnetization values normalized by formula units to consider. + * - total_magnetization_normalized_vol + - Tuple[float,float] + - Minimum and maximum total magnetization values normalized by volume to consider. + * - uncorrected_energy + - Tuple[int,int] + - Minimum and maximum uncorrected total energy in eV/atom to consider. + * - volume + - Tuple[float,float] + - Minimum and maximum volume to consider. + * - weighted_surface_energy + - Tuple[float,float] + - Minimum and maximum weighted surface energy in J/:math:`m^2` to consider. + * - weighted_work_function + - Tuple[float,float] + - Minimum and maximum weighted work function in eV to consider. + +.. +.. .. list-table:: Unsupported search criteria for the properties section +.. :widths: 30 20 60 +.. :header-rows: 1 +.. +.. * - Keyword +.. - Type +.. - Description +.. * - num_chunks +.. - int +.. - Maximum number of chunks of data to yield. None will yield all possible. +.. * - chunk_size +.. - int +.. - Number of data entries per chunk. +.. * - all_fields +.. - bool +.. - Whether to return all fields in the document. Defaults to True. +.. * - fields +.. - List[str] +.. - List of fields in SearchDoc to return data for. Default is material_id if all_fields is False. +.. + +Data to retrive (fields) +---------------------------------------------------------------- + +The items available for the ``fields`` section for retrieving from the database are listed below. + +.. code:: text + + band_gap + bandstructure + builder_meta + bulk_modulus + cbm + chemsys + composition + composition_reduced + database_IDs + decomposes_to + density + density_atomic + deprecated + deprecation_reasons + dos + dos_energy_down + dos_energy_up + e_electronic + e_ij_max + e_ionic + e_total + efermi + elements + energy_above_hull + energy_per_atom + equilibrium_reaction_energy_per_atom + es_source_calc_id + formation_energy_per_atom + formula_anonymous + formula_pretty + grain_boundaries + has_props + has_reconstructed + homogeneous_poisson + is_gap_direct + is_magnetic + is_metal + is_stable + last_updated + material_id + n + nelements + nsites + num_magnetic_sites + num_unique_magnetic_sites + ordering + origins + possible_species + property_name + shape_factor + shear_modulus + structure + surface_anisotropy + symmetry + task_ids + theoretical + total_magnetization + total_magnetization_normalized_formula_units + total_magnetization_normalized_vol + types_of_magnetic_species + uncorrected_energy_per_atom + universal_anisotropy + vbm + volume + warnings + weighted_surface_energy + weighted_surface_energy_EV_PER_ANG2 + weighted_work_function + xas diff --git a/docs/en/source/getcif/command/index.rst b/docs/en/source/getcif/command/index.rst new file mode 100644 index 0000000..48ee973 --- /dev/null +++ b/docs/en/source/getcif/command/index.rst @@ -0,0 +1,44 @@ +Command reference +================================================================ + +getcif +---------------------------------------------------------------- + + Retrieve crystallographic and other data from databases. + +SYNOPSIS: + + .. code-block:: bash + + getcif [-v][-q] [--dry-run] input_yaml + getcif -h + getcif --version + +DESCRIPTION: + + This program reads an input parameter file specified by ``input_yaml``, and connects to the database to submit a query and obtain the crystallographic data of materials. + It takes the following command line options. + + - ``input_yaml`` + + specifies an input parameter file in YAML format. + + - ``-v`` + + increases verbosity of the runtime messages. When specified multiple times, the program becomes more verbose. + + - ``-q`` + + decreases verbosity of the runtime messages. It cancels the effect of ``-v`` option, and when specified multiple times, the program becomes more quiet. + + - ``--dry-run`` + + displays search parameters and exits without connecting to the database. It allows to confirm the search conditions. This option supersedes the ``dry_run`` parameter in the input file. + + - ``-h`` + + displays help and exits. + + - ``--version`` + + displays version information. diff --git a/docs/en/source/getcif/filespec/index.rst b/docs/en/source/getcif/filespec/index.rst new file mode 100644 index 0000000..4ae408b --- /dev/null +++ b/docs/en/source/getcif/filespec/index.rst @@ -0,0 +1,137 @@ +.. _sec-getcif-fileformat: + +================================ +File format +================================ + +Input parameter file +================================ + +An input parameter file describes information to search for crystallographic and other data from Materials Project database by getcif. It should be given in YAML format, and consist of the following sections. + + #. database section: describes information on the database to connect. + + #. option section: describes output directory and other parameters for command execution. + + #. properties section: describes search conditions. + + #. fields section: describes types of data to be retrieved. + + +database +-------------------------------- + +``target`` + + This parameter specifies the database to connected to. At present this parameter is ignored. + +``api_key_file`` (default value: ``materials_project.key``) + + This parameter specifies a name of a file that contains the API key to access to the database. + The suffix of the file name must be ``.key``. + If the file does not exist or it does not contain a valid value, the API key is obtained from the environment variable ``MP_API_KEY``, or from the parameter ``PMG_MAPI_KEY`` of the pymatgen configuration file in ``~/.config/.pmgrc``. + + The API key file is a text file. A line starting with ``#`` is regarded as a comment. The heading and trailing spaces are ignored. When the file contains more than one line, the API key is taken from the first valid line. + + +option +-------------------------------- + +This section contains global settings needed for the first-principles calculation software. The available parameters are described in the corresponding sections below. + +``output_dir`` (default value: ``""``) + + This parameter specifies the directory name to store the data. The retrieved data are placed in this directory under the subdirectories by the material ID for each material. The default value is the current directory. + +``dry_run`` (default value: ``False``) + + When this parameter is set to True, getcif prints the search conditions and exists without connecting to the database. It is useful to check the content of the query. + +``symprec`` (default value: 0.1) + + This parameter specifies the tolerance in calculating the symmetry of a crystal structure when the structure data are written to a CIF file. By default, 0.1 is specified. When ``symprec`` is set to 0.0, it is treated as if ``symprec`` is unspecified, in which case a CIF file is generated without considering symmetry. + + ``symprec`` is a parameter that specifies the tolerance used to determine the symmetry of a crystal structure. When calculating the symmetry of a crystal structure, it is essential to consider the slight displacements of atomic positions and the precision of numerical calculations. ``symprec`` controls the allowable range of these displacements and serves as a threshold for deciding whether a symmetry operation should be applied. + + If ``symprec`` is set to a smaller value (e.g., 0.01), the symmetry determination becomes more stringent, and even minor displacements in the crystal structure may prevent the application of symmetry operations. This can result in the identification of a lower-symmetry space group. Conversely, if ``symprec`` is set to a larger value (e.g., 1.0), the symmetry determination is more lenient, allowing small displacements to be ignored, which may lead to the recognition of a higher-symmetry space group. + + When the ``symmetry`` field is specified in the fields section, the symmetry information determined using the default ``symprec=0.1`` in the Materials Project is obtained and written to a text file (``symmetry``). + + +properties +-------------------------------- + +This section defines the search conditions. +The conditions such as the element types, the crystal symmetry, or the values of physical properties are specified in the ``keyword: value`` format. They are treated as AND condition. +The available terms, based on the Materials Project API, conform to the parameters of +the ``materials.summary.search`` method in the mp-api library. The list of terms are summarized in the Appendix, and can be seen by ``getcif --help``. + +The format of the parameter values is shown below. It follows the YAML specification with several extension for brief description. + +- a number, a string + + describe as-is. + +- a boolean value + + describe as ``true`` or ``false``. + +- a list of numbers or strings + + describe in the indented style (block style) or in the comma-separated list enclosed by the bracket (flow style) in YAML notation. + It is also available that it is described as a space-separated list, for example: + + .. code:: yaml + + element: Sr Ti + +- a range of numerical value + + described as a list of two numbers such as ``[ min, max ]``, or a pair of two numbers separated by a space as ``min max``. The following formats are also available. + + ``<= max`` + less than or equal to ``max``. + + ``< max`` + less than ``max``. (For a real number, it is equivalent to ``<= max``. For an integer, it is treated as ``<= max-1``.) + + ``>= min`` + more than or equal to ``min``. + + ``> min`` + more than ``min``. (For a real number, it is equivalent to ``>= min``. For an integer, it is treated as ``>= min+1``.) + + ``min ~ max`` + between ``min`` and ``max``. + + N.B.: + + - A space must be placed between the symbol and the number. + + - Due to the YAML syntax that the symbol ``">"`` at the beginning of a term is treated as a special character, ``> min`` and ``>= min`` should be enclosed by quotes as ``"> min"`` and ``">= min"``, respectively. + + - In list notations, ``<= max`` and ``>= min`` are denoted as ``[ None, max ]`` and ``[ min, None ]``, respectively. + + +- wild card symbols + + The term ``formula`` accepts wild card symbols ``*`` for elements. In this case, the whole value is enclosed by ``" "``. For example, + + .. code:: yaml + + formula: "**O3" + + for :math:`ABO_3`-type materials. + + +fields +-------------------------------- + +This section defines the types of data to be retrieved. +A list of types is described in the YAML format, or as a space-sparated strings. In the latter format, it can be given in multiple-line format using the "|" notation of YAML. + +The available types of data conform to the ``field`` parameter of the Materials Project API. They are listed in the Appendix, and can be viewd by ``getcif --help``. + +The types ``material_id`` and ``formula_pretty`` are retrieved automatically. + +The obtained data are placed in the directory specified by ``output_dir`` parameter under the subdirectories of the material_id for each material. Each item is stored as a separate file of the item name. The crystal structure data (``structure``) is stored in a file ``structure.cif`` in CIF format. diff --git a/docs/en/source/getcif/index.rst b/docs/en/source/getcif/index.rst new file mode 100644 index 0000000..30bd063 --- /dev/null +++ b/docs/en/source/getcif/index.rst @@ -0,0 +1,14 @@ +**************************************************************** +A tool to retrieve crystallographic data from databases (getcif) +**************************************************************** + + +.. toctree:: + :maxdepth: 2 + :numbered: 2 + + about/index + tutorial/index + command/index + filespec/index + appendix/index diff --git a/docs/en/source/getcif/tutorial/index.rst b/docs/en/source/getcif/tutorial/index.rst new file mode 100644 index 0000000..b595f21 --- /dev/null +++ b/docs/en/source/getcif/tutorial/index.rst @@ -0,0 +1,114 @@ +.. _sec-getcif-tutorial: + +Tutorial +================================================================ + +In this tutorial, the procedure to use the database query tool ``getcif`` is described for searching and obtaining crystallographic information from databases for the materials science. +It consists of getting an API key, preparing an input parameter file, and running the getcif program. +We will explain the steps along an example of searching and obtaining information for ABO3-type materials provided in the ``docs/tutorial/getcif`` directory. + +Getting an API key +---------------------------------------------------------------- + +In order to access the Materials Project database via API, users need to register to the Materials Project and obtain an API key. +Visit the Materials Project website `https://next-gen.materialsproject.org `_, create an account and do Login. An API key is automatically generated on registration and shown in the user dashboard. The API key should be kept safe and not shared with others. + +The API key is made available to getcif by one of the following ways: + + (a) storing in the pymatgen configuration file by typing in as follows: + + .. code:: bash + + $ pmg config --add PMG_MAPI_KEY + + or editing the file ``~/.config/.pmgrc`` to include the following: + + .. code:: bash + + PMG_MAPI_KEY: + + (b) setting to an environment variable by: + + .. code:: bash + + $ MP_API_KEY="" + $ export MP_API_KEY + + (c) storing the API key to a file located in the directory where getcif is run. + The default value of the file name is ``materials_project.key``. Otherwise, it is given in the input parameter file. The file name must end with ``.key``. + + .. code:: yaml + + database: + api_key_file: materials_project.key + + Comment: it will be recommended to exclude files with ``.key`` as a suffix from version control system. (e.g. for Git, add ``*.key`` in ``.gitignore`` file.) + + +Prepare an input parameter file +---------------------------------------------------------------- + +An input parameter file describes search conditions and data items to retrieve from databases. + +An example is presented below. It is a text file in YAML format that contains information for accessing the database, search conditions, and types of data to obtain. +See :ref:`file format ` section for the details of specification. + +In YAML format, parameters are given in dictionary form as ``keyword: value``, where ``value`` is a scalar such as a number or a string, or a set of values enclosed in ``[ ]`` or listed in itemized form, or a nested dictionary. +For the search conditions and data fields, a list may be given by a space-separated items without brackets as a special notation. + +.. literalinclude:: ../../../../tutorial/getcif/input.yaml + :language: yaml + +The input parameter file consists of ``database``, ``option``, ``properties``, and ``fields`` sections. +The ``database`` section describes settings about connecting to databases. +In the example, ``target`` is set to Materials Project, though this term is not considered at present. ``api_key`` can be used to set the API key. The key may also be set in the pymatgen configuration file or in the environment variable. The latter is assumed in the tutorial. + +The ``option`` section describes optional settings for the command execution. +``output_dir`` specifies the directory to place the obtained data. The default is the current directory. If ``dry_run`` is set to ``true``, getcif does not connect to the database; instead, it just prints the search conditions and exits. ``dry_run`` may be specified in the command-line option. + +The ``properties`` section describes search conditions. They are given in the form of ``keyword: value`` and treated as AND conditions. +In the example, the search condition is specified to find materials with band gap less than or equal to 1.0, stable insulator, having composition formula of ABO3 (where A and B are arbitrary species), that belong to the space group ``Pm-3m`` (perovskite). +The ``band_gap`` takes a pair of values for the lower and upper limits, as well as the description such as ``< 1.0``. +The available terms for specifying search conditions are listed in the Appendix. + +The ``fields`` section describes the data items to obtain. It is given as a YAML list, or a space-sparated list. +``structure`` specifies the crystal structure data that will be stored in CIF format. +``band_gap`` specifies the value of band gap, and ``symmetry`` specifies the information on the symmetry. ``material_id`` that refers to the index of material data in the Materials Project, and ``formula_pretty`` that refers to the composition formula are automatically obtained. +The available items are listed in the Appendix, or can be found in the help message of getcif command. + +Obtaining data +---------------------------------------------------------------- + +The program ``getcif`` is executed with the input parameter file (``input.yaml``) as follows. + +.. code-block:: bash + + $ getcif input.yaml + +Then it connects to the Materials Project database, and obtains the data that match the specified conditions. +The summary including the material IDs, the composition formulas, and other data items is printed to the standard output as follows. + +.. literalinclude:: ../../../../tutorial/getcif/output_log.txt + :language: text + +The obtained data are placed in the directory specified by ``output_dir`` with the subdirectories of the material ID for each material. +In this example, seven subdirectories with names from mp-3163 to mp-977455 are created within ``result`` directory, and each subdirectory contains the following files: + + - band_gap + the value of band gap + + - formula + the composition formula (that corresponds to the field ``formula_pretty``) + + - structure.cif + the crystal structure data in CIF format + + - symmetry + the information about symmetry + +If an option ``--dry-run`` is added as a command-line option to ``getcif``, +the program prints the search condition as follows, and exits. +It will be useful for checking the search parameters. + +.. literalinclude:: ../../../../tutorial/getcif/output_dryrun.txt + :language: text diff --git a/docs/ja/source/_static/.placeholder b/docs/ja/source/_static/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/docs/ja/source/cif2x/about/index.rst b/docs/ja/source/cif2x/about/index.rst index 96c9bbf..c5c9025 100644 --- a/docs/ja/source/cif2x/about/index.rst +++ b/docs/ja/source/cif2x/about/index.rst @@ -16,6 +16,8 @@ cif2xは、cifファイルから第一原理計算用の入力ファイルを生 現在は、 `VASP `_, `Quantum ESPRESSO `_, `OpenMX `_, `AkaiKKR `_ に対応しています。 +付属プログラムとして、物質材料データベースから結晶構造データ等を取得するツール getcif を用意しています。現在は Materials Project からのデータ取得に対応しています。物質の組成や対称性、バンドギャップなどの物性値をもとにデータベースを検索し、データを取得することができます。 + ライセンス ---------------------------------------------------------------- @@ -53,6 +55,9 @@ cif2xは、cifファイルから第一原理計算用の入力ファイルを生 バージョン履歴 ---------------------------------------------------------------- + ver.1.1.0 + 2024/09/09 リリース + ver.1.0.1 2024/03/31 リリース diff --git a/docs/ja/source/cif2x/basic-usage.rst b/docs/ja/source/cif2x/basic-usage.rst index 06796ba..79b7b90 100644 --- a/docs/ja/source/cif2x/basic-usage.rst +++ b/docs/ja/source/cif2x/basic-usage.rst @@ -16,6 +16,10 @@ - OpenBabel モジュール (オプション) - AkaiKKRPythonUtil モジュール + CIFデータ取得ツール getcif を利用するには、上記に加えて以下のライブラリが必要です。 + + - mp-api モジュール + **ソースコード配布サイト** - `GitHubリポジトリ `_ @@ -37,7 +41,7 @@ $ cd ./cif2x $ python3 -m pip install . - 実行プログラム ``cif2x`` がインストールされます。 + 実行プログラム ``cif2x``, ``getcif`` がインストールされます。 なお、AkaiKKRPythonUtil モジュールは別途インストールが必要です。 以下の手順で `配布サイト `_ からソースコードを取得しインストールします。また、必要な seaborn モジュールもインストールしておきます。 @@ -85,6 +89,9 @@ | | |-- read_input.py | | |-- run_cif2kkr.py | |-- utils.py + | |-- getcif/ + | |-- __init__.py + | |-- main.py |-- sample/ diff --git a/docs/ja/source/cif2x/index.rst b/docs/ja/source/cif2x/index.rst index 82d4180..008f70a 100644 --- a/docs/ja/source/cif2x/index.rst +++ b/docs/ja/source/cif2x/index.rst @@ -13,3 +13,4 @@ command/index filespec/index appendix/index + ../getcif/index diff --git a/docs/ja/source/conf.py b/docs/ja/source/conf.py index b02006a..c3ea059 100644 --- a/docs/ja/source/conf.py +++ b/docs/ja/source/conf.py @@ -26,9 +26,9 @@ # built documents. # # The short X.Y version. -version = '1.0' +version = '1.1' # The full version, including alpha/beta/rc tags -release = '1.0.1' +release = '1.1.0' # -- General configuration --------------------------------------------------- diff --git a/docs/ja/source/getcif/about/index.rst b/docs/ja/source/getcif/about/index.rst new file mode 100644 index 0000000..49a517f --- /dev/null +++ b/docs/ja/source/getcif/about/index.rst @@ -0,0 +1,5 @@ +**************************************************************** +概要 +**************************************************************** + +getcifは物質材料データベースから結晶構造データ等を取得するツールです。現在は Materials Project からのデータ取得に対応しています。物質の組成や対称性、バンドギャップなどの物性値をもとにデータベースを検索し、データを取得することができます。 diff --git a/docs/ja/source/getcif/appendix/index.rst b/docs/ja/source/getcif/appendix/index.rst new file mode 100644 index 0000000..fc235ea --- /dev/null +++ b/docs/ja/source/getcif/appendix/index.rst @@ -0,0 +1,322 @@ +================================================================ +パラメータリスト +================================================================ + +検索条件 (properties) +---------------------------------------------------------------- + +properties に指定できる項目と、その項目がどのような値を取るかを以下にまとめます。 + +Materials Project API のクライアントアプリケーションの一つとして mp-api パッケージが Materials Project から公開されており、getcif はこのライブラリを利用してデータベースへの接続を行います。 +以下は MPRester クラスの materials.summary.search メソッドのパラメータに対応します。 +(以下の表は materials.summary.search のソースコードのコメントから転記しました。) + +値の型の表記は次のとおりです。 + +- ``str``: 文字列型 +- ``List[str]``: 文字列型のリスト +- ``str | List[str]``: 単一の文字列、または、文字列型のリスト +- ``int``: 整数型 +- ``bool``: 真偽値 (true または false) +- ``Tuple[float,float]``: 実数値 2つからなる組 (リスト) +- ``Tuple[int,int]``: 整数値 2つからなる組 (リスト) +- ``CrystalSystem``: 結晶のタイプを表す文字列。Triclinic, Monoclinic, Orthorhombic, Tetragonal, Trigonal, Hexagonal, Cubic のいずれか。 +- ``List[HasProps]``: 特性値のタイプを表す文字列のリスト。特性値は emmet.core.summary に定義されている。以下のいずれかの値を取る。 + + absorption, + bandstructure, + charge_density, + chemenv, + dielectric, + dos, + elasticity, + electronic_structure, + eos, + grain_boundaries, + insertion_electrodes, + magnetism, + materials, + oxi_states, + phonon, + piezoelectric, + provenance, + substrates, + surface_properties, + thermo, + xas + +- ``Ordering``: 磁気秩序を表す文字列。FM, AFM, FiM, NM のいずれか。 + +値のリストは、YAML形式の箇条書きおよび ``[ ... ]`` にカンマ区切りで記述するほか、空白区切りで列挙する記法も可能です。 + +``Tuple`` で表される型は値の範囲 (min, max) の指定に使われます。値のリストとして記述するほか、空白区切りで ``min max`` のように記述することもできます。また、以下の表記も可能です。 + + ``<= max`` : max 以下 + + ``< max`` : max より小さい + + ``>= min`` : min 以上 + + ``> min`` : min より大きい + + ``min ~ max`` : min 以上 max 以下 + +.. _getcif-cond-table: + +.. list-table:: 検索条件のキーワード + :widths: 30 20 60 + :header-rows: 1 + + * - Keyword + - Type + - Description + * - band_gap + - Tuple[float,float] + - Minimum and maximum band gap in eV to consider. + * - chemsys + - str | List[str] + - A chemical system, list of chemical systems (e.g., Li-Fe-O, Si-\*, [Si-O, Li-Fe-P]), or single formula (e.g., Fe2O3, Si\*). + * - crystal_system + - CrystalSystem + - Crystal system of material. + * - density + - Tuple[float,float] + - Minimum and maximum density to consider. + * - deprecated + - bool + - Whether the material is tagged as deprecated. + * - e_electronic + - Tuple[float,float] + - Minimum and maximum electronic dielectric constant to consider. + * - e_ionic + - Tuple[float,float] + - Minimum and maximum ionic dielectric constant to consider. + * - e_total + - Tuple[float,float] + - Minimum and maximum total dielectric constant to consider. + * - efermi + - Tuple[float,float] + - Minimum and maximum fermi energy in eV to consider. + * - elastic_anisotropy + - Tuple[float,float] + - Minimum and maximum value to consider for the elastic anisotropy. + * - elements + - List[str] + - A list of elements. + * - energy_above_hull + - Tuple[int,int] + - Minimum and maximum energy above the hull in eV/atom to consider. + * - equilibrium_reaction_energy + - Tuple[float,float] + - Minimum and maximum equilibrium reaction energy in eV/atom to consider. + * - exclude_elements + - List[str] + - List of elements to exclude. + * - formation_energy + - Tuple[int,int] + - Minimum and maximum formation energy in eV/atom to consider. + * - formula + - str | List[str] + - A formula including anonymized formula or wild cards (e.g., Fe2O3, ABO3, Si\*). A list of chemical formulas can also be passed (e.g., [Fe2O3, ABO3]). + * - g_reuss + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Reuss average of the shear modulus. + * - g_voigt + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt average of the shear modulus. + * - g_vrh + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt-Reuss-Hill average of the shear modulus. + * - has_props + - List[HasProps] + - The calculated properties available for the material. + * - has_reconstructed + - bool + - Whether the entry has any reconstructed surfaces. + * - is_gap_direct + - bool + - Whether the material has a direct band gap. + * - is_metal + - bool + - Whether the material is considered a metal. + * - is_stable + - bool + - Whether the material lies on the convex energy hull. + * - k_reuss + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Reuss average of the bulk modulus. + * - k_voigt + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt average of the bulk modulus. + * - k_vrh + - Tuple[float,float] + - Minimum and maximum value in GPa to consider for the Voigt-Reuss-Hill average of the bulk modulus. + * - magnetic_ordering + - Ordering + - Magnetic ordering of the material. + * - material_ids + - List[str] + - List of Materials Project IDs to return data for. + * - n + - Tuple[float,float] + - Minimum and maximum refractive index to consider. + * - num_elements + - Tuple[int,int] + - Minimum and maximum number of elements to consider. + * - num_sites + - Tuple[int,int] + - Minimum and maximum number of sites to consider. + * - num_magnetic_sites + - Tuple[int,int] + - Minimum and maximum number of magnetic sites to consider. + * - num_unique_magnetic_sites + - Tuple[int,int] + - Minimum and maximum number of unique magnetic sites to consider. + * - piezoelectric_modulus + - Tuple[float,float] + - Minimum and maximum piezoelectric modulus to consider. + * - poisson_ratio + - Tuple[float,float] + - Minimum and maximum value to consider for Poisson's ratio. + * - possible_species + - List[str] + - List of element symbols appended with oxidation states. (e.g. Cr2+,O2-) + * - shape_factor + - Tuple[float,float] + - Minimum and maximum shape factor values to consider. + * - spacegroup_number + - int + - Space group number of material. + * - spacegroup_symbol + - str + - Space group symbol of the material in international short symbol notation. + * - surface_energy_anisotropy + - Tuple[float,float] + - Minimum and maximum surface energy anisotropy values to consider. + * - theoretical + - bool + - Whether the material is theoretical. + * - total_energy + - Tuple[int,int] + - Minimum and maximum corrected total energy in eV/atom to consider. + * - total_magnetization + - Tuple[float,float] + - Minimum and maximum total magnetization values to consider. + * - total_magnetization_normalized_formula_units + - Tuple[float,float] + - Minimum and maximum total magnetization values normalized by formula units to consider. + * - total_magnetization_normalized_vol + - Tuple[float,float] + - Minimum and maximum total magnetization values normalized by volume to consider. + * - uncorrected_energy + - Tuple[int,int] + - Minimum and maximum uncorrected total energy in eV/atom to consider. + * - volume + - Tuple[float,float] + - Minimum and maximum volume to consider. + * - weighted_surface_energy + - Tuple[float,float] + - Minimum and maximum weighted surface energy in J/:math:`m^2` to consider. + * - weighted_work_function + - Tuple[float,float] + - Minimum and maximum weighted work function in eV to consider. + +.. +.. .. list-table:: Unsupported search criteria for the properties section +.. :widths: 30 20 60 +.. :header-rows: 1 +.. +.. * - Keyword +.. - Type +.. - Description +.. * - num_chunks +.. - int +.. - Maximum number of chunks of data to yield. None will yield all possible. +.. * - chunk_size +.. - int +.. - Number of data entries per chunk. +.. * - all_fields +.. - bool +.. - Whether to return all fields in the document. Defaults to True. +.. * - fields +.. - List[str] +.. - List of fields in SearchDoc to return data for. Default is material_id if all_fields is False. +.. + + +出力項目 (fields) +---------------------------------------------------------------- + +fields に指定できる項目を以下に列挙します。 + +.. code:: text + + band_gap + bandstructure + builder_meta + bulk_modulus + cbm + chemsys + composition + composition_reduced + database_IDs + decomposes_to + density + density_atomic + deprecated + deprecation_reasons + dos + dos_energy_down + dos_energy_up + e_electronic + e_ij_max + e_ionic + e_total + efermi + elements + energy_above_hull + energy_per_atom + equilibrium_reaction_energy_per_atom + es_source_calc_id + formation_energy_per_atom + formula_anonymous + formula_pretty + grain_boundaries + has_props + has_reconstructed + homogeneous_poisson + is_gap_direct + is_magnetic + is_metal + is_stable + last_updated + material_id + n + nelements + nsites + num_magnetic_sites + num_unique_magnetic_sites + ordering + origins + possible_species + property_name + shape_factor + shear_modulus + structure + surface_anisotropy + symmetry + task_ids + theoretical + total_magnetization + total_magnetization_normalized_formula_units + total_magnetization_normalized_vol + types_of_magnetic_species + uncorrected_energy_per_atom + universal_anisotropy + vbm + volume + warnings + weighted_surface_energy + weighted_surface_energy_EV_PER_ANG2 + weighted_work_function + xas diff --git a/docs/ja/source/getcif/command/index.rst b/docs/ja/source/getcif/command/index.rst new file mode 100644 index 0000000..4077042 --- /dev/null +++ b/docs/ja/source/getcif/command/index.rst @@ -0,0 +1,45 @@ +コマンドリファレンス +================================================================ + +getcif +---------------------------------------------------------------- + + 結晶構造データをデータベースから取得する + +書式: + + .. code-block:: bash + + getcif [-v][-q] [--dry-run] input_yaml + getcif -h + getcif --version + +説明: + + input_yaml に指定した入力パラメータファイルを読み込み、データベースを検索して結晶構造等のデータを取得します。 + 以下のオプションを受け付けます。 + + - ``-v`` + + 実行時に表示されるメッセージを冗長にします。複数回指定すると冗長度が上がります。 + + - ``-q`` + + 実行時に表示されるメッセージの冗長度を下げます。 ``-v`` の効果を打ち消します。複数回の指定が可能です。 + + - ``--dry-run`` + + 検索パラメータを出力し、データベースへの接続をせずに終了します。検索条件を確認することができます。入力パラメータファイルの ``dry_run`` の指定より優先します。 + + - ``input_yaml`` + + 入力パラメータファイルを指定します。ファイルフォーマットはYAML形式です。 + + - ``-h``, ``--help`` + + ヘルプを表示します。指定可能な検索条件 (properties) と取得データ (fields) の一覧も表示されます。 + + - ``--version`` + + バージョン情報を表示します。 + diff --git a/docs/ja/source/getcif/filespec/index.rst b/docs/ja/source/getcif/filespec/index.rst new file mode 100644 index 0000000..3313c51 --- /dev/null +++ b/docs/ja/source/getcif/filespec/index.rst @@ -0,0 +1,126 @@ +.. _sec-getcif-fileformat: + +====================== +ファイルフォーマット +====================== + +入力パラメータファイル +====================== + +入力パラメータファイルでは、getcif で Materials Project の物質材料データベースから結晶構造等のデータを取得するための設定情報を YAML形式で記述します。本ファイルは以下の部分から構成されます。 + + #. databaseセクション: 接続するデータベースについての情報を記述します。 + + #. optionセクション: 出力先のディレクトリや実行条件などを記述します。 + + #. propertiesセクション: 検索条件を記述します。 + + #. fieldsセクション: 取得データの種類を記述します。 + +database +-------------------------------- + + ``target`` + + 接続先のデータベースを指定します。現在はこの項目は無視されます。 + + ``api_key_file`` (デフォルト値: ``materials_project.key``) + + データベースに接続する際の APIキーを格納したファイルのファイル名を指定します。ファイル名の拡張子は ``.key`` とします。 + ファイルが存在しない、または有効なAPIキーが見つからない場合は、環境変数 ``MP_API_KEY`` または pymatgen の設定ファイル ``~/.config/.pmgrc`` の ``PMG_MAPI_KEY`` からAPIキーを取得します。 + + APIキーファイルはテキスト形式です。 ``#`` から始まる行はコメントとして扱われます。前後の空白は無視されます。複数行からなる場合は最初の有効な行からキーを取得します。 + + +option +-------------------------------- + + ``output_dir`` (デフォルト値: ``""``) + + 取得データを格納するディレクトリを指定します。データは ``output_dir`` 以下に、material ID をディレクトリ名としたディレクトリに出力されます。指定がない場合はカレントディレクトリです。 + + ``dry_run`` (デフォルト値: ``False``) + + データベースへの接続は行わず、検索条件を出力して終了します。検索内容の確認を行うことができます。 + + ``symprec`` (デフォルト値: 0.1) + + 結晶構造データをCIFファイルに出力する際の対称性を判定する許容精度を指定します。デフォルトは 0.1 です。 ``symprec`` に 0.0 を指定した場合は ``symprec`` を指定しないものとして扱い、対称性を考慮しないCIFファイルが生成されます。 + + ``symprec`` は、結晶構造における対称性を判定する際の許容精度(tolerance)を指定するパラメータです。対称性の計算においては、原子位置の微細なずれや数値計算の精度の影響を考慮する必要があります。``symprec`` はこのずれの許容範囲を制御し、対称操作が適用されるかどうかを決定する際の閾値として機能します。 + + ``symprec`` を小さく設定する(例: 0.01)と、対称性の判定がより厳密になり、結晶構造のわずかなずれでも対称操作が適用されない可能性が高まります。その結果、より低い対称性の空間群が得られることがあります。逆に、``symprec`` を大きく設定する(例: 1.0)と、対称性の判定が緩やかになり、わずかなずれが無視され、より高い対称性が認められることがあります。 + + なお、 ``fields`` セクションで ``symmetry`` を指定すると、Materials Project でデフォルトの ``symprec=0.1`` で判定された対称性の情報を取得し、テキストファイル(symmetry)に出力します。 + + +properties +-------------------------------- +検索条件を記述します。 +元素組成や結晶の対称性、物性値の範囲などの項目を、「項目名: 値」の形式で指定します。これらの条件は AND で扱われます。 +指定できる項目は Materials Project の API に定義されていますが、指定方法は mp-api ライブラリの ``materials.summary.search`` のパラメータに準拠します。項目のリストは Appendix を参照してください。また、 ``getcif --help`` で一覧を見ることができます。 + +値の指定方法は次のとおりです。YAML形式に準拠しますが、一部に簡便な記法を用意しています。 + +- 数値、文字列 + + そのまま記述します。 + +- 真偽値 + + true または false を記述します + +- 数値や文字列のリスト + + YAML形式の箇条書きおよび ``[ ... ]`` にカンマ区切りで記述するほか、空白区切りで列挙する記法も可能です。例: + + .. code:: yaml + + element: Sr Ti + +- 数値の範囲 + + 上限・下限のリストとして ``[ min, max ]`` のように記述するほか、空白区切りで ``min max`` のように記述することもできます。また、以下の記法も可能です。 + + ``<= max`` + max 以下 + + ``< max`` + max より小さい (実数の場合は ``<=`` と同等。整数の場合は ``<= max-1`` として扱われる) + + ``>= min`` + min 以上 + + ``> min`` + min より大きい (実数の場合は ``>=`` と同等。整数の場合は ``>= min+1`` として扱われる) + + ``min ~ max`` + min 以上 max 以下 + + 注記: + + - 記号と数値の間は空白を置きます。 + + - YAML記法では ``>`` は特殊文字として扱われるため、 ``>= min``, ``> min`` はそれぞれ ``">= min"``, ``"> min"`` のように ``" "`` で囲む必要があります。 + + - リストで記述する場合、 ``<= max``, ``>= min`` はそれぞれ ``[ None, max ]``, ``[ min, None ]`` のように表記します。 + +- ワイルドカード + + ``formula`` には元素種にワイルドカード ``*`` を指定できます。その場合は値を ``" "`` で囲みます。例: + .. code:: yaml + + formula: "**O3" + + :math:`ABO_3` 系の物質を指定します。 + +fields +-------------------------------- +取得するデータの種類を記述します。 +項目のリストを YAML形式で列挙するほか、空白区切りの文字列として記述することもできます。文字列は YAML記法 ``|`` を用いて複数行で書くこともできます。 +指定できる項目は Materials Project の API の ``fields`` パラメータに準拠します。項目のリストは Appendix を参照してください。また、 ``getcif --help`` で一覧を見ることができます。 + +``material_id`` と ``formula_pretty`` は暗黙的に取得します。 + +取得したデータは、 ``option`` セクションの ``output_dir`` で指定したディレクトリ内に、物質ごとに ``material_id`` をディレクトリ名とするディレクトリを作成し、その中に格納されます。 +項目ごとに、項目名をファイル名としたファイルに保存されます。但し、結晶構造データ (``structure``) は ``structure.cif`` というファイル名で CIF形式で書き出されます。 diff --git a/docs/ja/source/getcif/index.rst b/docs/ja/source/getcif/index.rst new file mode 100644 index 0000000..550bc75 --- /dev/null +++ b/docs/ja/source/getcif/index.rst @@ -0,0 +1,13 @@ +**************************************************************** +CIFデータ取得ツール (getcif) +**************************************************************** + +.. toctree:: + :maxdepth: 2 + :numbered: 2 + + about/index + tutorial/index + command/index + filespec/index + appendix/index diff --git a/docs/ja/source/getcif/tutorial/index.rst b/docs/ja/source/getcif/tutorial/index.rst new file mode 100644 index 0000000..514c785 --- /dev/null +++ b/docs/ja/source/getcif/tutorial/index.rst @@ -0,0 +1,109 @@ +.. _sec-getcif-tutorial: + +チュートリアル +================================================================ + +結晶構造などのデータを物質材料データベースから取得するツール getcif を使うには、検索条件と取得するデータを記述した入力パラメータファイルを作成し、プログラム getcif を実行します。現在は Materials Project が公開しているデータベースに対応しています。以下では ``docs/tutorial/getcif`` ディレクトリにある ABO3 系の物質を検索・取得するサンプルを例にチュートリアルを実施します。 + +APIキーを取得する +---------------------------------------------------------------- + +Materials Project のデータベースをプログラムから検索するには、あらかじめ Materials Project にユーザ登録し、APIキーを取得する必要があります。 +Materials Project の公式サイト `https://next-gen.materialsproject.org `_ にアクセスし、Login します。APIキーはユーザ登録時に自動的生成され、ユーザのダッシュボードから確認できます。取得した APIキーは安全に保管し、他人に知られないようにしましょう。 + +APIキーを getcif にセットするには、以下のいずれかを実行します。 + + (a) pymatgen の設定ファイルに登録する + + .. code:: bash + + $ pmg config --add PMG_MAPI_KEY + + を実行するか、設定ファイル ``~/.config/.pmgrc`` に + + .. code:: bash + + PMG_MAPI_KEY: + + を書き込みます。 + + (b) 環境変数にセットする + + .. code:: bash + + $ MP_API_KEY="" + $ export MP_API_KEY + + を実行します。 + + (c) ファイルに格納する + + APIキーをファイルに書き込み、getcif を実行するディレクトリに配置します。ファイル名のデフォルトは ``materials_project.key`` です。異なるファイル名を使う場合は、入力パラメータファイル (input.yaml) の api_key_file にファイル名を指定します。ファイル名は ``.key`` の拡張子が必要です。 + + .. code:: yaml + + database: + api_key_file: materials_project.key + + 註: バージョン管理ツールを使っている場合は、 ``.key`` の拡張子を持つファイルを管理から除外するとよいでしょう。(Git の場合は ``.gitignore`` ファイルに ``*.key`` を追加します。) + + +入力パラメータファイルを作成する +---------------------------------------------------------------- + +入力パラメータファイルにはデータベース検索および出力についての設定を記述します。 + +以下に入力パラメータファイルのサンプルを記載します。このファイルは YAML形式のテキストファイルで、データベースへの接続に必要な情報や、検索条件、取得するデータの種類などの内容を記述します。仕様の詳細については :ref:`ファイルフォーマット ` の章を参照してください。 + +YAMLフォーマットでは、 ``keyword: value`` の辞書形式でパラメータを記述します。 ``value`` には数値や文字列などのスカラー値や、複数の値を ``[ ]`` または箇条書きの形式で列挙するリスト型、または辞書型を入れ子にすることができます。検索条件と出力項目については、特別な記法として、リスト型を括弧を使わず空白区切りで列挙する形式でも書くことができます。 + +.. literalinclude:: ../../../../tutorial/getcif/input.yaml + :language: yaml + +入力パラメータファイルは ``database``, ``option``, ``properties``, ``fields`` のブロックからなります。 +``database`` にはデータベース接続に関する設定を記述します。例では ``target`` に Materials Project を指定していますが、現時点ではこの項目は無視されます。その他、 ``api_key`` に APIキーを指定できます。APIキーは pymatgen の設定ファイルや環境変数にセットすることもできます。例では後者の方式を仮定しています。 + +``option`` には実行時のオプションを記述します。 ``output_dir`` は取得したデータの格納先ディレクトリを指定します。省略時にはカレントディレクトリに書き出されます。 ``dry_run`` を ``true`` にセットすると、データベースへの接続はせず、検索条件を出力して終了します。 ``dry_run`` はコマンドラインオプションでも指定できます。 + +``properties`` は検索条件の指定を行います。検索条件を「項目: 値」の書式で列挙し、これらの条件は AND で扱われます。例では、バンドギャップが 1.0以下、安定な絶縁体で、組成式は ABO3 (A, B は任意の元素種)、空間群は ``Pm-3m`` という条件を指定しています。 ``band_gap`` には値の範囲を上限・下限の組で指定するほか、 ``< 1.0`` のような記法も使用できます。検索条件にどのような項目が指定できるかは Appendix を参照してください。 + +``fields`` には出力項目を列挙します。YAMLのリスト形式のほか、空白区切りで項目を列挙する書き方もできます。また、例に示したとおり、YAMLの記法を使って複数行で書くこともできます。 +``structure`` は結晶構造データで、取得したデータはCIF形式で出力されます。 ``band_gap`` はバンドギャップの数値、 ``symmetry`` は対称性の情報です。この他に、 ``material_id`` で Materials Project 内の物質データのインデックスと、 ``formula_pretty`` で組成式が暗黙的に取得されます。出力項目の一覧は Appendix を参照してください。また、getcif コマンドのヘルプメッセージにも一覧が出力されます。 + + +データを取得する +---------------------------------------------------------------- + +入力パラメータファイル(input.yaml)を引数として getcif を実行します。 + +.. code-block:: bash + + $ getcif input.yaml + +``getcif`` を実行すると Materials Project のデータベースに接続し、検索条件に合致するデータを取得します。標準出力には、以下のように、物質の material ID と組成式、データ項目のサマリーが出力されます。 + +.. literalinclude:: ../../../../tutorial/getcif/output_log.txt + :language: text + +取得したデータは、 ``output_dir`` で指定した result ディレクトリ内に物質ごとに格納されます。この例では、 result 以下に mp-3163 から mp-977455 までの 7つのディレクトリが作成され、各ディレクトリには次のファイルが書き込まれます。 + + - band_gap + + バンドギャップの値 + + - formula + + 組成式 (formula_pretty に対応します) + + - structure.cif + + CIF形式の結晶構造データ + + - symmetry + + 対称性に関するデータ + +``getcif`` の実行に ``--dry-run`` オプションを付けると、以下のように検索条件を出力して終了します。データベースに実際に接続する前に検索項目を確認できます。 + +.. literalinclude:: ../../../../tutorial/getcif/output_dryrun.txt + :language: text diff --git a/docs/tutorial/getcif/input.yaml b/docs/tutorial/getcif/input.yaml new file mode 100644 index 0000000..b91430a --- /dev/null +++ b/docs/tutorial/getcif/input.yaml @@ -0,0 +1,18 @@ +database: + target: materials project + +option: + output_dir: result + # dry_run: false + +properties: + band_gap: < 1.0 + is_stable: true + is_metal: false + formula: "**O3" + spacegroup_symbol: Pm-3m + +fields: | + structure + band_gap + symmetry diff --git a/docs/tutorial/getcif/output_dryrun.txt b/docs/tutorial/getcif/output_dryrun.txt new file mode 100644 index 0000000..ab47411 --- /dev/null +++ b/docs/tutorial/getcif/output_dryrun.txt @@ -0,0 +1,2 @@ +$ getcif --dry-run input.yaml +{'band_gap': (None, 1.0), 'is_stable': True, 'is_metal': False, 'formula': '**O3', 'spacegroup_symbol': 'Pm-3m', 'fields': ['structure', 'band_gap', 'symmetry', 'material_id', 'formula_pretty']} diff --git a/docs/tutorial/getcif/output_log.txt b/docs/tutorial/getcif/output_log.txt new file mode 100644 index 0000000..4eae33f --- /dev/null +++ b/docs/tutorial/getcif/output_log.txt @@ -0,0 +1,9 @@ +material_id formula band_gap symmetry formula_pretty +mp-861502 AcFeO3 0.9887999999999995 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' AcFeO3 +mp-977455 PaAgO3 0.915 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' PaAgO3 +mp-11775 RbUO3 0.45420000000000016 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' RbUO3 +mp-3163 BaSnO3 0.37239999999999984 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' BaSnO3 +mp-4126 KUO3 0.44540000000000024 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' KUO3 +mp-865322 UTlO3 0.27360000000000007 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' UTlO3 +mp-753781 EuHfO3 0.4795999999999996 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' EuHfO3 + diff --git a/pyproject.toml b/pyproject.toml index fdd5022..a4174b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "HTP-tools-cif2x" -version = "1.0.1" +version = "1.1.0" description = "High-ThroughtPut calculation tools" authors = ["HTP-tools developers "] license = "GPL-3.0-or-later" @@ -9,17 +9,21 @@ readme = "README.md" repository = "https://github.com/issp-center-dev/HTP-tools-dev" packages = [ - { include = "cif2x", from = "src" } + { include = "cif2x", from = "src" }, + { include = "getcif", from = "src" } ] [tool.poetry.dependencies] -python = "^3.7" +python = "^3.9" numpy = ">=1.20" pandas = ">=1.4" pymatgen = ">=2019.12.3" f90nml = "^1.4" qe-tools = "^1.1" beautifulsoup4 = "^4" +mp_api = ">=0.40" +phonopy = "*" +"ruamel.yaml" = ">=0.17" [tool.poetry.extras] @@ -27,6 +31,7 @@ beautifulsoup4 = "^4" [tool.poetry.scripts] cif2x = "cif2x.main:main" +getcif = "getcif.main:main" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/sample/getcif/sample1/input.yaml b/sample/getcif/sample1/input.yaml new file mode 100644 index 0000000..e18fbd5 --- /dev/null +++ b/sample/getcif/sample1/input.yaml @@ -0,0 +1,14 @@ +database: + target: materials project + +option: + output_dir: result + +properties: + elements: [ Si, O ] + band_gap: [ 0.5, 0.6 ] + +fields: | + structure + band_gap + symmetry diff --git a/sample/getcif/sample1/run_log.txt b/sample/getcif/sample1/run_log.txt new file mode 100644 index 0000000..db69ef2 --- /dev/null +++ b/sample/getcif/sample1/run_log.txt @@ -0,0 +1,86 @@ +material_id formula band_gap symmetry +mp-720216 Ca3Y9Al18CrSiO48 0.5981999999999998 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1046697 CrSiO5 0.5381 crystal_system= symbol='Cc' number=9 point_group='m' symprec=0.1 version='2.0.2' +mp-558986 CaMn6SiO12 0.5927000000000002 crystal_system= symbol='I4_1/acd' number=142 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-2714391 Na2Mg2SiSn2S5O24 0.5030000000000001 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1036730 Mg30SiSnO32 0.5617999999999999 crystal_system= symbol='P4/mmm' number=123 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-2714230 Na2Mg4SiGe4S11O48 0.5554000000000001 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-2714233 Na2Mg4SiSn4S11O48 0.5614999999999997 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-805388 LiFeSiO4 0.5118 crystal_system= symbol='Pc' number=7 point_group='m' symprec=0.1 version='2.0.2' +mp-1265992 ZnCrSiO5 0.5385999999999997 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-554498 SiO2 0.5029000000000001 crystal_system= symbol='P622' number=177 point_group='622' symprec=0.1 version='2.0.2' +mp-1210331 Na2PrSiO5 0.5707000000000002 crystal_system= symbol='Pnma' number=62 point_group='mmm' symprec=0.1 version='2.0.2' +mp-2712920 MgSiSn3S5O24 0.5127000000000002 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1189398 KUSiO7 0.536 crystal_system= symbol='P2_1' number=4 point_group='2' symprec=0.1 version='2.0.2' +mp-755733 Li2MnSiO5 0.5558000000000001 crystal_system= symbol='P4/nmm' number=129 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-758206 Fe35SiO48 0.5619000000000001 crystal_system= symbol='Cm' number=8 point_group='m' symprec=0.1 version='2.0.2' +mp-2712557 NaYSiGe(SO6)2 0.5490999999999999 crystal_system= symbol='Cc' number=9 point_group='m' symprec=0.1 version='2.0.2' +mp-1034271 Mg14SiSnO16 0.5737999999999985 crystal_system= symbol='P4/mmm' number=123 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-743965 SiH12N2(O2F3)2 0.5504 crystal_system= symbol='C2/c' number=15 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1034860 Mg14ZnSiO16 0.5541000000000009 crystal_system= symbol='Pmmm' number=47 point_group='mmm' symprec=0.1 version='2.0.2' +mp-2714334 Na2Ca2SiSn2S5O24 0.5984 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1041877 ZnSi2WO6 0.5735999999999999 crystal_system= symbol='Pbca' number=61 point_group='mmm' symprec=0.1 version='2.0.2' +mp-760971 Li3Fe2(SiO4)2 0.5579000000000001 crystal_system= symbol='Pc' number=7 point_group='m' symprec=0.1 version='2.0.2' +mp-760462 Li5Ti(SiO4)2 0.5455000000000005 crystal_system= symbol='C2' number=5 point_group='2' symprec=0.1 version='2.0.2' +mp-541023 CaFe3Si2HO9 0.5066999999999999 crystal_system= symbol='Pnma' number=62 point_group='mmm' symprec=0.1 version='2.0.2' +mp-861554 Li5Mn(SiO4)2 0.5485999999999999 crystal_system= symbol='C2' number=5 point_group='2' symprec=0.1 version='2.0.2' +mp-1198051 K2Al2Si2O9 0.5306000000000001 crystal_system= symbol='P2_12_12_1' number=19 point_group='222' symprec=0.1 version='2.0.2' +mp-1197843 Ca5(SiO5)2 0.5149 crystal_system= symbol='P2_1/c' number=14 point_group='2/m' symprec=0.1 version='2.0.2' +mp-752894 Li3Co2(SiO4)2 0.5112000000000001 crystal_system= symbol='C2' number=5 point_group='2' symprec=0.1 version='2.0.2' +mp-1256235 Al4V13Si2(SbO14)2 0.5661000000000005 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-560675 K6Cu(SiO4)2 0.5214999999999999 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-764102 Li3Fe2(SiO4)2 0.5410999999999997 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1640236 Li7Si2(NiO4)3 0.5914999999999999 crystal_system= symbol='C2' number=5 point_group='2' symprec=0.1 version='2.0.2' +mp-561495 Cs4CuSi2O7 0.5831 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-725197 Mg3Si2O9 0.5438000000000001 crystal_system= symbol='Cm' number=8 point_group='m' symprec=0.1 version='2.0.2' +mp-1211926 Rb2Ta2Si2O11 0.5166999999999999 crystal_system= symbol='P4_122' number=91 point_group='422' symprec=0.1 version='2.0.2' +mp-560631 KUSi2O7 0.5523000000000002 crystal_system= symbol='P4/mbm' number=127 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-859799 Li9Si2Ni5O16 0.5962999999999998 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-764516 Li3Co2(SiO4)2 0.5178 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1177732 Li3Co2(SiO4)2 0.5561 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-558320 K3U3Si2O13 0.5497000000000005 crystal_system= symbol='P-62c' number=190 point_group='-6m2' symprec=0.1 version='2.0.2' +mp-1202769 Rb2Nb2Si2O11 0.5739000000000001 crystal_system= symbol='P4_322' number=95 point_group='422' symprec=0.1 version='2.0.2' +mp-1212196 Rb2Nb2Si2O11 0.5318 crystal_system= symbol='P4_122' number=91 point_group='422' symprec=0.1 version='2.0.2' +mp-1044098 Si2Bi13(SbO14)2 0.5369000000000002 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-1196415 Li2Al2(SiO4)3 0.5507 crystal_system= symbol='Fdd2' number=43 point_group='mm2' symprec=0.1 version='2.0.2' +mp-1250467 Ca2Cr2(SiO4)3 0.5882000000000001 crystal_system= symbol='I4_1/acd' number=142 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-2716120 Na6Zn2Si3Sn2(SO8)3 0.5551999999999997 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1196695 Na2Al2(SiO4)3 0.5721 crystal_system= symbol='Fdd2' number=43 point_group='mm2' symprec=0.1 version='2.0.2' +mp-2714208 Na6Mg16Ta8Si3(S11O48)3 0.5320999999999998 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-778793 Li4Fe3(SiO4)3 0.5446 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-2713803 Na2Nb2In2Si3(SO8)3 0.5621999999999998 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1202493 CaAl2Si3O13 0.5198 crystal_system= symbol='Cc' number=9 point_group='m' symprec=0.1 version='2.0.2' +mp-1228625 BaAl2Si3O14 0.538 crystal_system= symbol='P2_12_12' number=18 point_group='222' symprec=0.1 version='2.0.2' +mp-2713905 Na2Sc2Nb2Si3(SO8)3 0.5029000000000001 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1173755 Na4Al3Si3B(O3F)4 0.532 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-696360 Ca2Cu2Si3(HO3)4 0.5940999999999996 crystal_system= symbol='P2_1/m' number=11 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1211776 KTiSi3O11 0.5903 crystal_system= symbol='P2_1/c' number=14 point_group='2/m' symprec=0.1 version='2.0.2' +mp-2713833 Na6Mg4Si3Sn4(S3O16)3 0.5345999999999997 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-2713586 Na18Ca20Nb4Si3(S11O48)3 0.5026999999999999 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1182601 Ca2Cu2Si3(HO3)4 0.5920000000000001 crystal_system= symbol='P2_1/m' number=11 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1173811 Na4Al3Si3NO15 0.5025 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1172462 Cr2(SiO4)3 0.5603 crystal_system= symbol='Ia-3d' number=230 point_group='m-3m' symprec=0.1 version='2.0.2' +mp-2230766 MgMn2(SiO3)4 0.5550999999999999 crystal_system= symbol='C2' number=5 point_group='2' symprec=0.1 version='2.0.2' +mp-1233228 MgSc4(Si2O7)2 0.5823 crystal_system= symbol='R3m' number=160 point_group='3m' symprec=0.1 version='2.0.2' +mp-734012 K2LuSi4O11 0.5249 crystal_system= symbol='P2_1/m' number=11 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1266531 Ca3Si4(MoO7)2 0.5236000000000001 crystal_system= symbol='C2/c' number=15 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1235393 Sr2LiV2(Si2O7)2 0.5660999999999996 crystal_system= symbol='Pm' number=6 point_group='m' symprec=0.1 version='2.0.2' +mp-1263117 Zn3Si4(AgO7)2 0.5735999999999999 crystal_system= symbol='C2/c' number=15 point_group='2/m' symprec=0.1 version='2.0.2' +mp-758359 Li2CrSi4O11 0.5257999999999998 crystal_system= symbol='I4/m' number=87 point_group='4/m' symprec=0.1 version='2.0.2' +mp-867649 LiTi(Si2O5)2 0.5326999999999993 crystal_system= symbol='Pnc2' number=30 point_group='mm2' symprec=0.1 version='2.0.2' +mp-1211221 KSi4O9 0.5781000000000001 crystal_system= symbol='P6/mmm' number=191 point_group='6/mmm' symprec=0.1 version='2.0.2' +mp-1214160 Ca2Al(SiO3)4 0.5954000000000002 crystal_system= symbol='Pmna' number=53 point_group='mmm' symprec=0.1 version='2.0.2' +mp-2716089 Na6Mg2Si5Ge6S7O48 0.5599000000000003 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-2716074 Na14Mg6Si5Ge2S7O48 0.5142000000000002 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1203161 Ca10Si5(ClO10)2 0.5481000000000003 crystal_system= symbol='C2/c' number=15 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1211795 KNaSi5SnW3O13 0.5925999999999996 crystal_system= symbol='C2/m' number=12 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1202111 CaAl2(Si3O10)2 0.5963 crystal_system= symbol='P2_1' number=4 point_group='2' symprec=0.1 version='2.0.2' +mp-1195621 Rb2U(Si2O5)3 0.5691999999999995 crystal_system= symbol='C2/c' number=15 point_group='2/m' symprec=0.1 version='2.0.2' +mp-1214355 Ca(SiO2)8 0.5343 crystal_system= symbol='Pc' number=7 point_group='m' symprec=0.1 version='2.0.2' +mp-1227550 Ca4MnFe11Si8(HO9)4 0.5691000000000002 crystal_system= symbol='Pm' number=6 point_group='m' symprec=0.1 version='2.0.2' +mp-1200158 K2BaNa2Ti4(SiO4)8 0.5992 crystal_system= symbol='Cm' number=8 point_group='m' symprec=0.1 version='2.0.2' +mp-2716209 Na18Nb4Zn8Si9(SO8)9 0.5216000000000001 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' +mp-1214270 Ca4Be2Al2Si9O28 0.5991 crystal_system= symbol='Cmcm' number=63 point_group='mmm' symprec=0.1 version='2.0.2' +mp-1224568 K2Mn5Si12O31 0.5683999999999998 crystal_system= symbol='P-62c' number=190 point_group='-6m2' symprec=0.1 version='2.0.2' +mp-686232 Na7Al11Si13(AgO12)4 0.5483 crystal_system= symbol='P1' number=1 point_group='1' symprec=0.1 version='2.0.2' + diff --git a/sample/getcif/sample2/input.yaml b/sample/getcif/sample2/input.yaml new file mode 100644 index 0000000..1753519 --- /dev/null +++ b/sample/getcif/sample2/input.yaml @@ -0,0 +1,17 @@ +database: + target: materials project + +option: + output_dir: result + +properties: + band_gap: < 1.0 + is_stable: true + is_metal: false + formula: "**O3" + spacegroup_symbol: Pm-3m + +fields: | + structure + band_gap + dos diff --git a/sample/getcif/sample2/run_log.txt b/sample/getcif/sample2/run_log.txt new file mode 100644 index 0000000..9544c56 --- /dev/null +++ b/sample/getcif/sample2/run_log.txt @@ -0,0 +1,9 @@ +material_id formula band_gap dos +mp-861502 AcFeO3 0.9887999999999995 total={'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.3644999999999996, cbm=8.8903, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=1.9184, cbm=7.3735, vbm=5.4551, efermi=6.57184844, spin_polarization=None)} elemental={Element Ac: {'total': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4090999999999996, cbm=8.8903, vbm=6.4812, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=1.963, cbm=7.4181, vbm=5.4551, efermi=6.57184844, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=3.3459000000000003, cbm=9.2918, vbm=5.9459, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.766, cbm=7.8196, vbm=5.0536, efermi=6.57184844, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.8552, cbm=9.2918, vbm=6.4366, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4091000000000005, cbm=7.8642, vbm=5.4551, efermi=6.57184844, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4536999999999995, cbm=8.8903, vbm=6.4366, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.0522, cbm=7.4181, vbm=5.3659, efermi=6.57184844, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.8106, cbm=9.2918, vbm=6.4812, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.0076, cbm=7.4181, vbm=5.4105, efermi=6.57184844, spin_polarization=None)}}, Element Fe: {'total': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=3.0783000000000005, cbm=9.6041, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=1.963, cbm=7.3735, vbm=5.4105, efermi=6.57184844, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=4.2828, cbm=10.3179, vbm=6.0351, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.8998, cbm=7.8196, vbm=4.9198, efermi=6.57184844, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=4.193699999999999, cbm=10.6749, vbm=6.4812, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4537000000000004, cbm=7.8642, vbm=5.4105, efermi=6.57184844, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=3.0783000000000005, cbm=9.6041, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.0522, cbm=7.3735, vbm=5.3213, efermi=6.57184844, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4091000000000005, cbm=8.9349, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.1861000000000006, cbm=7.6412, vbm=5.4551, efermi=6.57184844, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4537000000000004, cbm=8.9349, vbm=6.4812, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4983000000000004, cbm=7.8196, vbm=5.3213, efermi=6.57184844, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.766, cbm=9.2918, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.1861000000000006, cbm=7.6412, vbm=5.4551, efermi=6.57184844, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=89.2261, cbm=33.7844, vbm=-55.4417, efermi=6.57184844, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4537000000000004, cbm=8.9349, vbm=6.4812, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.4983000000000004, cbm=7.8196, vbm=5.3213, efermi=6.57184844, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.766, cbm=9.2918, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.1861000000000006, cbm=7.6412, vbm=5.4551, efermi=6.57184844, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.3644999999999996, cbm=8.8903, vbm=6.5258, efermi=6.57184844, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1759043), band_gap=2.0076, cbm=7.3735, vbm=5.3659, efermi=6.57184844, spin_polarization=None)}} magnetic_ordering='FM' +mp-977455 PaAgO3 0.915 total={'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)} elemental={Element Pa: {'total': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=2.4931, cbm=5.2011, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}}, Element Ag: {'total': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.6392999999999995, cbm=4.3473, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=68.30590000000001, cbm=28.7325, vbm=-39.5734, efermi=2.69089079, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=68.30590000000001, cbm=28.7325, vbm=-39.5734, efermi=2.69089079, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=68.30590000000001, cbm=28.7325, vbm=-39.5734, efermi=2.69089079, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.4684999999999997, cbm=4.1765, vbm=2.708, efermi=2.69089079, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1712701), band_gap=1.6052, cbm=4.3132, vbm=2.708, efermi=2.69089079, spin_polarization=None)}} magnetic_ordering='NM' +mp-11775 RbUO3 0.45420000000000016 total={'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)} elemental={Element Rb: {'total': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=1.33, cbm=6.1692, vbm=4.8392, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=3.3392, cbm=5.9429, vbm=2.6037, efermi=4.87461637, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.9713, cbm=5.7165, vbm=2.7452, efermi=4.87461637, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None)}}, Element U: {'total': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.66, cbm=5.3486, vbm=2.6886, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.9430000000000005, cbm=5.7731, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.6317000000000004, cbm=5.3486, vbm=2.7169, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.9147, cbm=5.7448, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.48109999999999964, cbm=5.3203, vbm=4.8392, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.5094000000000003, cbm=5.3486, vbm=4.8392, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.9713, cbm=5.7448, vbm=2.7735, efermi=4.87461637, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.48109999999999964, cbm=5.3203, vbm=4.8392, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=56.595600000000005, cbm=15.3943, vbm=-41.2013, efermi=4.87461637, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.5094000000000003, cbm=5.3486, vbm=4.8392, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.9147, cbm=5.7448, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=0.45279999999999987, cbm=5.3203, vbm=4.8675, efermi=4.87461637, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-2294171), band_gap=2.8864, cbm=5.7165, vbm=2.8301, efermi=4.87461637, spin_polarization=None)}} magnetic_ordering='FM' +mp-3163 BaSnO3 0.37239999999999984 total={'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4378000000000002, cbm=4.5096, vbm=4.0718, efermi=4.0536049, spin_polarization=None)} elemental={Element Ba: {'total': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=1.7509999999999994, cbm=5.7985, vbm=4.0475, efermi=4.0536049, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=1.2889000000000004, cbm=5.0689, vbm=3.78, efermi=4.0536049, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=1.6051000000000002, cbm=5.6283, vbm=4.0232, efermi=4.0536049, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=3.1615, cbm=7.2333, vbm=4.0718, efermi=4.0536049, spin_polarization=None)}}, Element Sn: {'total': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4620999999999995, cbm=4.4853, vbm=4.0232, efermi=4.0536049, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.6079000000000003, cbm=4.4366, vbm=3.8287, efermi=4.0536049, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.5594000000000001, cbm=4.6069, vbm=4.0475, efermi=4.0536049, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=1.678, cbm=5.7012, vbm=4.0232, efermi=4.0536049, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4378000000000002, cbm=4.5096, vbm=4.0718, efermi=4.0536049, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4377999999999993, cbm=4.4853, vbm=4.0475, efermi=4.0536049, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4864000000000006, cbm=4.5582, vbm=4.0718, efermi=4.0536049, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=48.637699999999995, cbm=25.4481, vbm=-23.1896, efermi=4.0536049, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.4133999999999993, cbm=4.4609, vbm=4.0475, efermi=4.0536049, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=0.5106999999999999, cbm=4.5825, vbm=4.0718, efermi=4.0536049, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-2298219), band_gap=1.8239, cbm=5.8714, vbm=4.0475, efermi=4.0536049, spin_polarization=None)}} magnetic_ordering='NM' +mp-4126 KUO3 0.44540000000000024 total={'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=1.0), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=1.0)} elemental={Element K: {'total': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=1.4013, cbm=6.2041, vbm=4.8028, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.3902, cbm=5.7972, vbm=2.407, efermi=4.82537501, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None)}}, Element U: {'total': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=2.8478, cbm=5.3, vbm=2.4522, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1642, cbm=5.7068, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=2.8478, cbm=5.3, vbm=2.4522, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.4971999999999994, cbm=5.3, vbm=4.8028, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.4971999999999994, cbm=5.3, vbm=4.8028, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.4971999999999994, cbm=5.3, vbm=4.8028, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=90.40559999999999, cbm=47.6098, vbm=-42.7958, efermi=4.82537501, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.4971999999999994, cbm=5.3, vbm=4.8028, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=0.45199999999999996, cbm=5.3, vbm=4.848, efermi=4.82537501, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1695096), band_gap=3.1189999999999998, cbm=5.6616, vbm=2.5426, efermi=4.82537501, spin_polarization=None)}} magnetic_ordering='FM' +mp-865322 UTlO3 0.27360000000000007 None +mp-753781 EuHfO3 0.4795999999999996 total={'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5867999999999993, cbm=5.8119, vbm=5.2251, efermi=5.20252597, spin_polarization=1.0), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8365, cbm=5.9924, vbm=2.1559, efermi=5.20252597, spin_polarization=1.0)} elemental={Element Eu: {'total': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5867999999999993, cbm=5.8119, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8365, cbm=5.9924, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=1.1284, cbm=6.3084, vbm=5.18, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.5588, cbm=6.5792, vbm=2.0204, efermi=5.20252597, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=1.1284, cbm=6.3084, vbm=5.18, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.3331, cbm=6.4438, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5867999999999993, cbm=5.8119, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8365, cbm=5.9924, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5867999999999993, cbm=5.8119, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8815999999999997, cbm=6.0375, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}}, Element Hf: {'total': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5867999999999993, cbm=5.8119, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8817, cbm=5.9924, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=1.5346000000000002, cbm=6.6243, vbm=5.0897, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.965, cbm=6.7146, vbm=1.7496, efermi=5.20252597, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=1.1284, cbm=6.3084, vbm=5.18, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.1074, cbm=6.2181, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5415999999999999, cbm=5.7667, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8817, cbm=5.9924, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None)}}, Element O: {'total': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.6318999999999999, cbm=5.857, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8815999999999997, cbm=6.0375, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}, 's': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.9026999999999994, cbm=6.1278, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.242800000000001, cbm=6.3535, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.6318999999999999, cbm=5.857, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8815999999999997, cbm=6.0375, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None)}, 'f': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=90.2718, cbm=49.9548, vbm=-40.317, efermi=5.20252597, spin_polarization=None)}}} orbital={'s': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=1.0832999999999995, cbm=6.3084, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=4.242800000000001, cbm=6.3535, vbm=2.1107, efermi=5.20252597, spin_polarization=None)}, 'p': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.6769999999999996, cbm=5.9021, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8815999999999997, cbm=6.0375, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}, 'd': {'1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=0.5415999999999999, cbm=5.7667, vbm=5.2251, efermi=5.20252597, spin_polarization=None), '-1': DosSummaryData(task_id=MPID(mp-1761084), band_gap=3.8365, cbm=5.9924, vbm=2.1559, efermi=5.20252597, spin_polarization=None)}} magnetic_ordering='FM' + diff --git a/sample/getcif/sample3/find_XSrO3.py b/sample/getcif/sample3/find_XSrO3.py new file mode 100644 index 0000000..3d80cdb --- /dev/null +++ b/sample/getcif/sample3/find_XSrO3.py @@ -0,0 +1,27 @@ +import os,sys +from ruamel.yaml import YAML +from getcif import QueryMaterialsProject +import copy + +import logging +logger = logging.getLogger("main") + +if len(sys.argv) < 2: + print("usage: {} input_file".format(sys.argv[0])) + sys.exit(0) +else: + input_file = sys.argv[1] + +yaml = YAML(typ="safe") +with open(input_file, "r") as fp: + info = yaml.load(fp) + +data = [] +for elem in ["Co", "Ti", "V"]: + info["properties"].update({"elements": [ elem, "Sr" ]}) + data += QueryMaterialsProject(info).run() + +if len(data) > 0: + print([[idx+1, v["material_id"], v["formula"]] for idx, v in enumerate(data)]) + +print("done.") diff --git a/sample/getcif/sample3/input.yaml b/sample/getcif/sample3/input.yaml new file mode 100644 index 0000000..d39f21a --- /dev/null +++ b/sample/getcif/sample3/input.yaml @@ -0,0 +1,17 @@ +database: + target: materials project + +option: + output_dir: result + # dry_run: true + +properties: + formula: "**O3" + # elements: Sr Ti + # elements: Sr V + elements: Sr Co + +fields: | + structure + band_gap + symmetry diff --git a/sample/getcif/sample3/run_log.txt b/sample/getcif/sample3/run_log.txt new file mode 100644 index 0000000..ef2b88e --- /dev/null +++ b/sample/getcif/sample3/run_log.txt @@ -0,0 +1,19 @@ +material_id formula band_gap symmetry +mp-1283849 SrCoO3 0.0 crystal_system= symbol='P4/mbm' number=127 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-505766 SrCoO3 0.0 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' +mp-1273854 SrCoO3 0.0 crystal_system= symbol='P4/mmm' number=123 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-1272815 SrCoO3 0.0 crystal_system= symbol='Amm2' number=38 point_group='mm2' symprec=0.1 version='2.0.2' +mp-562466 SrCoO3 0.0 crystal_system= symbol='P-1' number=2 point_group='-1' symprec=0.1 version='2.0.2' +mp-550781 SrCoO3 0.0 crystal_system= symbol='P2_1/m' number=11 point_group='2/m' symprec=0.1 version='2.0.2' + +material_id formula band_gap symmetry +mp-5229 SrTiO3 1.7719999999999998 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' +mp-4651 SrTiO3 1.8487999999999998 crystal_system= symbol='I4/mcm' number=140 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-551830 SrTiO3 1.7869999999999995 crystal_system= symbol='I4/mcm' number=140 point_group='4/mmm' symprec=0.1 version='2.0.2' +mp-776018 SrTiO3 1.7362000000000002 crystal_system= symbol='P6_3/mmc' number=194 point_group='6/mmm' symprec=0.1 version='2.0.2' + +material_id formula band_gap symmetry +mp-18717 SrVO3 0.0 crystal_system= symbol='Pm-3m' number=221 point_group='m-3m' symprec=0.1 version='2.0.2' + +[[1, 'mp-1283849', 'SrCoO3'], [2, 'mp-505766', 'SrCoO3'], [3, 'mp-1273854', 'SrCoO3'], [4, 'mp-1272815', 'SrCoO3'], [5, 'mp-562466', 'SrCoO3'], [6, 'mp-550781', 'SrCoO3'], [7, 'mp-5229', 'SrTiO3'], [8, 'mp-4651', 'SrTiO3'], [9, 'mp-551830', 'SrTiO3'], [10, 'mp-776018', 'SrTiO3'], [11, 'mp-18717', 'SrVO3']] +done. diff --git a/src/getcif/__init__.py b/src/getcif/__init__.py new file mode 100644 index 0000000..c6c636c --- /dev/null +++ b/src/getcif/__init__.py @@ -0,0 +1,10 @@ +import os +from importlib.metadata import PackageNotFoundError, version + +from .main import QueryMaterialsProject + +try: + __version__ = version("htp-tools-getcif") +except PackageNotFoundError: + # package not installed + pass diff --git a/src/getcif/main.py b/src/getcif/main.py new file mode 100644 index 0000000..51da255 --- /dev/null +++ b/src/getcif/main.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python3 + +import os +import sys +import copy +import io +import csv +import json +import argparse +import logging + +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path +from ruamel.yaml import YAML, YAMLError +from mp_api.client import MPRester + +# enum parameters +from emmet.core.symmetry import CrystalSystem +from emmet.core.summary import HasProps +from pymatgen.analysis.magnetism import Ordering + +logger = logging.getLogger("getcif") + +try: + __version__ = version("htp-tools-getcif") +except PackageNotFoundError: + __version__ = "develop" + +class QueryMaterialsProject: + query_table = { + "band_gap": "tuple[float,float]", + "chemsys": "str|list[str]", + "crystal_system": "CrystalSystem", + "density": "tuple[float,float]", + "deprecated": "bool", + "e_electronic": "tuple[float,float]", + "e_ionic": "tuple[float,float]", + "e_total": "tuple[float,float]", + "efermi": "tuple[float,float]", + "elastic_anisotropy": "tuple[float,float]", + "elements": "list[str]", + "energy_above_hull": "tuple[float,float]", + "equilibrium_reaction_energy": "tuple[float,float]", + "exclude_elements": "list[str]", + "formation_energy": "tuple[float,float]", + "formula": "str|list[str]", + "g_reuss": "tuple[float,float]", + "g_voigt": "tuple[float,float]", + "g_vrh": "tuple[float,float]", + "has_props": "list[HasProps]", + "has_reconstructed": "bool", + "is_gap_direct": "bool", + "is_metal": "bool", + "is_stable": "bool", + "k_reuss": "tuple[float,float]", + "k_voigt": "tuple[float,float]", + "k_vrh": "tuple[float,float]", + "magnetic_ordering": "Ordering", + "material_ids": "list[str]", + "n": "tuple[float,float]", + "num_elements": "tuple[int,int]", + "num_sites": "tuple[int,int]", + "num_magnetic_sites": "tuple[int,int]", + "num_unique_magnetic_sites": "tuple[int,int]", + "piezoelectric_modulus": "tuple[float,float]", + "poisson_ratio": "tuple[float,float]", + "possible_species": "list[str]", + "shape_factor": "tuple[float,float]", + "spacegroup_number": "int", + "spacegroup_symbol": "str", + "surface_energy_anisotropy": "tuple[float,float]", + "theoretical": "bool", + "total_energy": "tuple[float,float]", + "total_magnetization": "tuple[float,float]", + "total_magnetization_normalized_formula_units": "tuple[float,float]", + "total_magnetization_normalized_vol": "tuple[float,float]", + "uncorrected_energy": "tuple[float,float]", + "volume": "tuple[float,float]", + "weighted_surface_energy": "tuple[float,float]", + "weighted_work_function": "tuple[float,float]", + } + + # mpr.materials.summary.available_fields + available_fields = [ + "band_gap", + "bandstructure", + "builder_meta", + "bulk_modulus", + "cbm", + "chemsys", + "composition", + "composition_reduced", + "database_IDs", + "decomposes_to", + "density", + "density_atomic", + "deprecated", + "deprecation_reasons", + "dos", + "dos_energy_down", + "dos_energy_up", + "e_electronic", + "e_ij_max", + "e_ionic", + "e_total", + "efermi", + "elements", + "energy_above_hull", + "energy_per_atom", + "equilibrium_reaction_energy_per_atom", + "es_source_calc_id", + "formation_energy_per_atom", + "formula_anonymous", + "formula_pretty", + "grain_boundaries", + "has_props", + "has_reconstructed", + "homogeneous_poisson", + "is_gap_direct", + "is_magnetic", + "is_metal", + "is_stable", + "last_updated", + "material_id", + "n", + "nelements", + "nsites", + "num_magnetic_sites", + "num_unique_magnetic_sites", + "ordering", + "origins", + "possible_species", + "property_name", + "shape_factor", + "shear_modulus", + "structure", + "surface_anisotropy", + "symmetry", + "task_ids", + "theoretical", + "total_magnetization", + "total_magnetization_normalized_formula_units", + "total_magnetization_normalized_vol", + "types_of_magnetic_species", + "uncorrected_energy_per_atom", + "universal_anisotropy", + "vbm", + "volume", + "warnings", + "weighted_surface_energy", + "weighted_surface_energy_EV_PER_ANG2", + "weighted_work_function", + "xas", + ] + + def __init__(self, info): + self.info = copy.deepcopy(info) + self._setup_dbinfo(self.info.get("database", {})) + self._setup_option(self.info.get("option", {})) + + def _setup_dbinfo(self, info): + self.dbinfo = info + + # setup api key + # 1. read from api_key_file (default "materials_project.key") if exists + # 2. taken from environment variable or pymetgen settings (leave api_key None) + + api_key = None + + api_key_file = info.get("api_key_file", "materials_project.key") + if api_key_file.endswith(".key") and Path(api_key_file).exists(): + with open(Path(api_key_file), "r", encoding="utf-8") as fp: + data = [s.strip() for s in fp.readlines() if not s.strip().startswith("#")] + if data: + api_key = data[0] + if not api_key: + logger.debug("api_key not set. use environment variable or pymatgen settings") + + self.api_key = api_key + + def _setup_option(self, info): + self.output_dir = info.get("output_dir", "") + self.dry_run = info.get("dry_run", False) + # symprec: default value 0.1 used in Materials Project to determine symmetry + self.symprec = info.get("symprec", 0.1) + if self.symprec == 0: + self.symprec = None + + def _find_query(self, info): + props = self._find_properties(info.get("properties", {})) + + fields = self._find_fields(info.get("fields", "")) + + props.update({"fields": fields}) + return props + + def _find_fields(self, info): + if isinstance(info, str): + fields = info.split() + elif isinstance(info, list): + fields = info + else: + raise ValueError("invalid fields parameter") + + if not "material_id" in fields: + fields.append("material_id") + if not "formula_pretty" in fields: + fields.append("formula_pretty") + + # check + err = 0 + for field in fields: + if not field in QueryMaterialsProject.available_fields: + logger.error("unknown field name {}".format(field)) + err += 1 + + if err > 0: + logger.error("fields check failed") + raise ValueError("fields check failed") + + logger.info("query: fields={}".format(fields)) + return fields + + def _find_properties(self, info): + props = info + + def _find_val_or_none(val, typ=float): + if isinstance(val, str): + if val.lower() == "none": + return None + return typ(val) + + def _find_range(val, typ=float): + """ + accepted patterns + keyword: < 1.0 + keyword: <= 1.0 + keyword: > 0.5 + keyword: >= 0.5 + keyword: 0.5 ~ 1.0 + keyword: 0.5 1.0 + keyword: = 0.5 + not accepted + keyword: 1.0 < #NG + """ + if any(symbol in val for symbol in ("<",">","=","~")): + w = [s.strip() for s in val.split()] + if len(w) == 2: + if w[0] == "<=": + return (None, typ(w[1])) + if w[0] == "<": + if typ == int: + return (None, typ(w[1])-1) + else: + return (None, typ(w[1])) + if w[0] == ">=": + return (typ(w[1]), None) + if w[0] == ">": + if typ == int: + return (typ(w[1])+1, None) + else: + return (typ(w[1]), None) + if w[0] == "=": + return (typ(w[1]), typ(w[1])) + elif len(w) == 3: + if w[1] == "~": + return (typ(w[0]), typ(w[2])) + else: + w = [typ(s) for s in val.split()] + if len(w) == 2: + return tuple(w[0:2]) + raise ValueError("illegal string: {}".format(val)) + + # format and check + err = 0 + for prop,value in props.items(): + typ = QueryMaterialsProject.query_table.get(prop, None) + if typ is None: + logger.error("unknown query key {}".format(prop)) + err += 1 + elif typ == "bool": + pass + elif typ == "int": + pass + elif typ == "str": + pass + elif typ == "list[str]": + if isinstance(value, str): + props[prop] = value.split() + elif typ == "str|list[str]": + if value == "": + pass + else: + v = value.split() + props[prop] = v if len(v) > 1 else v[0] + elif typ == "tuple[int,int]": + if isinstance(value, list): + props[prop] = tuple(_find_val_or_none(s, int) for s in value[0:2]) + elif isinstance(value, str): + props[prop] = _find_range(value, int) + elif typ == "tuple[float,float]": + if isinstance(value, list): + props[prop] = tuple(_find_val_or_none(s) for s in value[0:2]) + elif isinstance(value, str): + props[prop] = _find_range(value) + elif typ == "list[HasProps]": # for has_props + if isinstance(value, list): + props[prop] = [HasProps[v.lower()] for v in value] + elif isinstance(value, str): + props[prop] = [HasProps[v.lower()] for v in value.split()] + elif typ == "CrystalSystem": # for crystal_system + props[prop] = CrystalSystem[value.capitalize()] + elif typ == "Ordering": # for magnetic_ordering + props[prop] = Ordering[value] + else: + logger.error("unknown query type {} for key {}".format(typ, prop)) + logger.debug("prop={}, value={}, type={} -> {}".format(prop,value,typ,props[prop])) + + if err > 0: + logger.error("properties check failed") + raise ValueError("properties check failed") + + logger.info("query: properties={}".format(props)) + return props + + def _do_query(self, query): + if not self.dry_run: + try: + with MPRester(api_key=self.api_key, mute_progress_bars=True) as mpr: + docs = mpr.materials.summary.search(**query) + material_ids = [ doc.material_id for doc in docs ] + logger.info("result: number of entries={}".format(len(docs))) + logger.info("result: material_ids={}".format(material_ids)) + except Exception as e: + logger.error("{}".format(e)) + raise + return docs + else: + logger.info("dry run.") + print(query) + return [] + + def _do_summary(self, docs, fields): + results = [] + symprec = self.symprec + + for idx, doc in enumerate(docs): + m_id = str(doc.material_id) + m_formula = doc.formula_pretty + d = dict(doc) + + data = { "material_id": m_id, "formula": m_formula } + + data_dir = Path(self.output_dir, m_id) + os.makedirs(data_dir, exist_ok=True) + + for field in fields: + if field == "material_id": + pass + elif field == "structure": + if symprec is not None: + doc.structure.to(Path(data_dir, "structure.cif"), fmt="cif", symprec=symprec) + else: + doc.structure.to(Path(data_dir, "structure.cif"), fmt="cif") + elif field == "formula_pretty": + with open(Path(data_dir, "formula"), "w", encoding="utf-8") as fp: + fp.write(str(d[field]) + "\n") + else: + with open(Path(data_dir, field), "w", encoding="utf-8") as fp: + fp.write(str(d[field]) + "\n") + data.update({field: d[field]}) + + # # export summary in json format + # with open(Path(data_dir, "summary.json"), "w") as fp: + # fp.write(json.dumps(data)) + + results.append(data) + return results + + def _do_report(self, results, output_file=None, fmt="text"): + if len(results) < 1: + return None + + if fmt == "text": + fields = results[0].keys() + retv = " ".join(fields) + "\n" + for result in results: + retv += " ".join([str(result[field]) for field in fields]) + "\n" + elif fmt == "csv": + output = io.StringIO() + + writer = csv.DictWriter(output, fieldnames=results[0].keys()) + writer.writeheader() + for result in results: + writer.writerow(result) + + retv = output.getvalue() + elif fmt == "json": + retv = json.dumps(results) + else: + logger.error("unknown format {}".format(fmt)) + return None + + if output_file is None: + print(retv) + else: + with open(output_file, "w", encoding="utf-8") as fp: + fp.write(retv) + return None + + def run(self, output_file=None, fmt="text"): + query = self._find_query(self.info) + docs = self._do_query(query) + results = self._do_summary(docs, query["fields"]) + self._do_report(results, output_file, fmt) + return results + + @classmethod + def show_info(cls, file=None): + if file is None: + file = sys.stdout + file.write("Query parameters:\n") + file.write(" Properties:\n") + for k, v in QueryMaterialsProject.query_table.items(): + file.write(" {:24s} {}\n".format(k, v)) + file.write("\n") + file.write(" Fields:\n") + for k in QueryMaterialsProject.available_fields: + file.write(" {}\n".format(k)) + +def main(): + class ArgumentParser(argparse.ArgumentParser): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + def print_help(self, file=None): + if file is None: + file = sys.stdout + super().print_help(file) + file.write("\n") + QueryMaterialsProject.show_info(file) + + parser = ArgumentParser(prog="getcif") + parser.add_argument("input_file", action="store", + help="input parameter file (input.yaml)") + parser.add_argument("-v", "--verbose", action="count", default=0, + help="increase output verbosity") + parser.add_argument("-q", "--quiet", action="count", default=0, + help="decrease output verbosity") + parser.add_argument("--dry-run", action="store_true", default=False, + help="dry run") + parser.add_argument("--version", action="version", + version="%(prog)s version {}".format(__version__)) + + args = parser.parse_args() + + logging.basicConfig(level=logging.WARNING-(args.verbose-args.quiet)*10) + + try: + yaml = YAML(typ="safe") + with open(Path(args.input_file), mode="r", encoding="utf-8") as fp: + info_dict = yaml.load(fp) + except FileNotFoundError as e: + logger.error("{}".format(e)) + sys.exit(1) + except YAMLError as e: + logger.error("{}".format(e)) + sys.exit(1) + + if info_dict is None: + logger.error("input file is empty") + # raise ValueError("empty input file") + sys.exit(1) + + if args.dry_run: + if "option" in info_dict: + info_dict["option"].update({"dry_run": True}) + else: + info_dict["option"] = {"dry_run": True} + + #try: + stat = QueryMaterialsProject(info_dict).run() + #except Exception as e: + # logger.error("{}".format(e)) + # sys.exit(1) + +if __name__ == "__main__": + main()