diff --git a/Model_Approval.ipynb b/Model_Approval.ipynb new file mode 100644 index 0000000..6265dd0 --- /dev/null +++ b/Model_Approval.ipynb @@ -0,0 +1,2328 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "443ec0a7-722d-496c-83a6-1dbb18c9961c", + "metadata": {}, + "source": [ + "# Imports " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ca8782f5-8c0d-4c8a-88d2-d83ada30c1a6", + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import cv2 as cv\n", + "from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from heatmap import heatmap, corrplot\n", + "\n", + "from sklearn.model_selection import RandomizedSearchCV, train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.cluster import KMeans\n", + "import pickle\n", + "\n", + "### PoreAnalyzer Package ################################################################################\n", + "\n", + "from analyzer.classificator import Preprocessing, DimensionReductionPCA, KMeansClassifier, DBSCANClassifier\n", + "from analyzer.features import Pore" + ] + }, + { + "cell_type": "markdown", + "id": "feb34a60-a784-4f0b-b8f5-f2c182b34002", + "metadata": { + "tags": [] + }, + "source": [ + "# Pore seperation and Feature Extraktion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "257e0668-9382-474f-b341-5a2a04ac0f7d", + "metadata": {}, + "outputs": [], + "source": [ + "# Poren einzeln aus Bildern auslesen und speichern\n", + "images = [filepath for filepath in glob.iglob('Images/Ti6Al4V-Stichprobe_4/Images/*.jpg')]\n", + "process_time = []\n", + "\n", + "# lists for extracted features\n", + "solidity = []\n", + "area = []\n", + "defect_density = []\n", + "perimeter = []\n", + "mean_conv_defect = []\n", + "img_names = []\n", + "pore_number = []\n", + "images_index = []\n", + "pore_index = []\n", + "\n", + "# try: \n", + "# os.makedirs('Images/AlSi35_Analysis/all_pores')\n", + "# except OSError:\n", + "# pass\n", + "\n", + "for i, image in enumerate(images):\n", + " \n", + " # if i == 2:\n", + " # break\n", + " image_saved = False\n", + "\n", + " time_zero = time.time() # Zeit zum Beginn des Loops speichern\n", + " \n", + " img = cv.imread(image, cv.IMREAD_UNCHANGED) # Bild laden\n", + " head_tail = os.path.split(image)\n", + " names_split = int(os.path.splitext(head_tail[1])[0].removesuffix('_4')) \n", + " \n", + " try: \n", + " specimen=Micrograph(img, scale=1.79, cropsize_microns=2000)\n", + " except:\n", + " continue\n", + " \n", + " for j, pore in enumerate(specimen.prs):\n", + " pore_sep = PoreSeperator(pore.contour, specimen.img_cnt, segmentsize=0.1)\n", + " pore_index.append(str(i) + '_' + str(j))\n", + " if pore_sep.check_size() == True:\n", + " pore_sep.save('Images/Ti6Al4V-Stichprobe_4/pores/{}_{}.jpg'.format(names_split, j))#i,j))\n", + " image_saved = True\n", + " \n", + " # save features of saved pore in list\n", + " solidity.append(pore.solidity)\n", + " area.append(pore.area)\n", + " defect_density.append(pore.defect_density)\n", + " perimeter.append(pore.perimeter)\n", + " mean_conv_defect.append(pore.mean_defect)\n", + " img_names.append(names_split)\n", + " pore_number.append(j)\n", + " images_index.append(str(i) + '_' + str(j))\n", + " \n", + " process_time.append(time.time()-time_zero)\n", + " time_pending = (len(images)-i-1)*sum(process_time)/len(process_time)\n", + " pending_hours = int(time_pending*0.01666666/60)\n", + " pending_minutes = int(time_pending*0.01666666-(60*pending_hours))\n", + " print('\\r Image {:03d} of {}, time pending: {:02d} h {:02d} Min'.format(i+1, len(images), pending_hours, pending_minutes), end=\"\") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2dd5fd5-731b-413b-82b7-71a501ac3ef5", + "metadata": {}, + "outputs": [], + "source": [ + "dataframe = {'img': img_names, 'pore_number': pore_number, 'image_index': images_index,'solidity': solidity, 'area': area, 'defect_density': defect_density, 'perimeter': perimeter, 'mean_defect': mean_conv_defect}\n", + "\n", + "dataframe = pd.DataFrame.from_dict(dataframe)\n", + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f4da05d-06b1-4c03-b57d-061fa99adcd4", + "metadata": {}, + "outputs": [], + "source": [ + "dataframe.to_csv('Images/AlSi10Mg_Pores/AlSi10Mg_Pore_Features.csv', sep=';', decimal=\",\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d8155d5c-52b1-4407-8f3f-28bb1d4a38b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imgpore_numberimage_indexsolidityareadefect_densityperimetermean_defect
01100_101.0000002.6477310.0000006.1789820.000000
11110_110.96703313.70590014.59225614.5904430.301292
21120_120.967553487.6496772.46078387.1363940.506548
31130_130.96153870.0869878.56079032.5296060.340241
41140_140.974747420.8334174.27722778.0710540.381112
...........................
9006198657634_6570.788834374.1087591.871114102.7869822.095467
9006298658634_6581.0000000.6229950.0000003.1572020.000000
9006398659634_6590.9655174.36096822.9306898.0845220.249599
9006498660634_6600.89873411.05816936.17235415.8421060.290443
9006598666634_6660.901754120.0823708.32761747.7178330.590924
\n", + "

90066 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " img pore_number image_index solidity area defect_density \\\n", + "0 1 10 0_10 1.000000 2.647731 0.000000 \n", + "1 1 11 0_11 0.967033 13.705900 14.592256 \n", + "2 1 12 0_12 0.967553 487.649677 2.460783 \n", + "3 1 13 0_13 0.961538 70.086987 8.560790 \n", + "4 1 14 0_14 0.974747 420.833417 4.277227 \n", + "... ... ... ... ... ... ... \n", + "90061 98 657 634_657 0.788834 374.108759 1.871114 \n", + "90062 98 658 634_658 1.000000 0.622995 0.000000 \n", + "90063 98 659 634_659 0.965517 4.360968 22.930689 \n", + "90064 98 660 634_660 0.898734 11.058169 36.172354 \n", + "90065 98 666 634_666 0.901754 120.082370 8.327617 \n", + "\n", + " perimeter mean_defect \n", + "0 6.178982 0.000000 \n", + "1 14.590443 0.301292 \n", + "2 87.136394 0.506548 \n", + "3 32.529606 0.340241 \n", + "4 78.071054 0.381112 \n", + "... ... ... \n", + "90061 102.786982 2.095467 \n", + "90062 3.157202 0.000000 \n", + "90063 8.084522 0.249599 \n", + "90064 15.842106 0.290443 \n", + "90065 47.717833 0.590924 \n", + "\n", + "[90066 rows x 8 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe = pd.read_csv('Images/Ti6Al4V_Analysis/Ti6Al4V_Pore_Features.csv', sep=';', decimal=\",\")\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "id": "7838f6b5-6e2e-4c27-bea8-82a5202a3348", + "metadata": { + "tags": [] + }, + "source": [ + "# Import Labeled Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7f652c2d-79d0-4206-a64a-2251a0c767d3", + "metadata": {}, + "outputs": [], + "source": [ + "# loading image names for the pores in the label folders\n", + "images_kh = [filepath for filepath in glob.iglob('Images\\Ti6Al4V_Analysis\\labeled\\Keyhole\\*.jpg')]\n", + "images_lof = [filepath for filepath in glob.iglob('Images\\Ti6Al4V_Analysis\\labeled\\Lack_of_Fusion\\*.jpg')]\n", + "images_process = [filepath for filepath in glob.iglob('Images\\Ti6Al4V_Analysis\\labeled\\Process\\*.jpg')]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b7e21dde-ff64-48de-b661-b86e91bea65e", + "metadata": {}, + "outputs": [], + "source": [ + "# loading images and label number \n", + "img = []\n", + "labels = []\n", + "for image in images_kh:\n", + " img.append(cv.resize(cv.imread(image, cv.IMREAD_GRAYSCALE), dsize=(100, 100), interpolation=cv.INTER_CUBIC))\n", + " # img.append(cv.imread(image, cv.IMREAD_GRAYSCALE))\n", + " labels.append(0)\n", + " \n", + "for image in images_lof:\n", + " img.append(cv.resize(cv.imread(image, cv.IMREAD_GRAYSCALE), dsize=(100, 100), interpolation=cv.INTER_CUBIC))\n", + " # img.append(cv.imread(image, cv.IMREAD_GRAYSCALE))\n", + " labels.append(1)\n", + " \n", + "for image in images_process:\n", + " img.append(cv.resize(cv.imread(image, cv.IMREAD_GRAYSCALE), dsize=(100, 100), interpolation=cv.INTER_CUBIC))\n", + " # img.append(cv.imread(image, cv.IMREAD_GRAYSCALE))\n", + " labels.append(2)\n", + " \n", + "images = images_kh + images_lof + images_process" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "75beeebc-f8ef-456a-b320-4d9e66dba0ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1200" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(img)" + ] + }, + { + "cell_type": "markdown", + "id": "5f0cbcdc-6e38-4800-81ab-011b9aad9cf6", + "metadata": { + "tags": [] + }, + "source": [ + "## Pixelfeatures" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2c373b22-2f10-4eb0-845e-136e81b86bbd", + "metadata": {}, + "outputs": [], + "source": [ + "# flattening images into one long vector\n", + "data_px = Preprocessing(img)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0668b012-ed59-4d96-9e56-733c3b99c0c9", + "metadata": {}, + "outputs": [], + "source": [ + "data_px.dataframe.to_csv('Images/Ti6Al4V_Analysis/Ti6Al4V_Pore_PX_flattened.csv', sep=';', decimal=\",\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "86333f14-8bef-4497-8ddc-5b63480ccec8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Varianz explained by PCA model with 250 Components is 96.888 %.\n" + ] + } + ], + "source": [ + "# linear dimension reduction to 250 principle components\n", + "data_red = DimensionReductionPCA(data_px.dataframe, k=250)\n", + "data_red.pca_explain()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5f12a260-53aa-4448-b611-42e21daed87a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PCA Model saved as Models/PCA_Model_Binary_RF.pickle\n" + ] + } + ], + "source": [ + "data_red.save_pca_model(name='Models/PCA_Model_Binary_RF')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5d03273-0888-4267-bd84-79456a713127", + "metadata": {}, + "outputs": [], + "source": [ + "# scaling data if wanted\n", + "data_red = data_red.dataframe #.scale()" + ] + }, + { + "cell_type": "markdown", + "id": "b8a2e6e9-2288-4106-a0a7-82c208dc957d", + "metadata": { + "tags": [] + }, + "source": [ + "## Local Features" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "65c40857-6671-40b3-88c6-a50bef1d840e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\altmann\\AppData\\Local\\Temp\\ipykernel_7948\\4138243829.py:21: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " local_features['label'] = labels\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imgpore_numberimage_indexsolidityareadefect_densityperimetermean_defectlabel
01100_101.0000002.6477310.0000006.1789820.0000000
11110_110.96703313.70590014.59225614.5904430.3012920
51150_150.9830102469.0866611.579531192.7304050.4486640
61170_170.96402920.87034719.16594917.2852840.2754460
101220_221.0000005.1397120.0000008.4114610.0000000
..............................
9005598651634_6510.8484854.36096845.8613779.2007620.3795392
9006198657634_6570.788834374.1087591.871114102.7869822.0954672
9006398659634_6590.9655174.36096822.9306898.0845220.2495992
9006498660634_6600.89873411.05816936.17235415.8421060.2904432
9006598666634_6660.901754120.0823708.32761747.7178330.5909242
\n", + "

1200 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " img pore_number image_index solidity area defect_density \\\n", + "0 1 10 0_10 1.000000 2.647731 0.000000 \n", + "1 1 11 0_11 0.967033 13.705900 14.592256 \n", + "5 1 15 0_15 0.983010 2469.086661 1.579531 \n", + "6 1 17 0_17 0.964029 20.870347 19.165949 \n", + "10 1 22 0_22 1.000000 5.139712 0.000000 \n", + "... ... ... ... ... ... ... \n", + "90055 98 651 634_651 0.848485 4.360968 45.861377 \n", + "90061 98 657 634_657 0.788834 374.108759 1.871114 \n", + "90063 98 659 634_659 0.965517 4.360968 22.930689 \n", + "90064 98 660 634_660 0.898734 11.058169 36.172354 \n", + "90065 98 666 634_666 0.901754 120.082370 8.327617 \n", + "\n", + " perimeter mean_defect label \n", + "0 6.178982 0.000000 0 \n", + "1 14.590443 0.301292 0 \n", + "5 192.730405 0.448664 0 \n", + "6 17.285284 0.275446 0 \n", + "10 8.411461 0.000000 0 \n", + "... ... ... ... \n", + "90055 9.200762 0.379539 2 \n", + "90061 102.786982 2.095467 2 \n", + "90063 8.084522 0.249599 2 \n", + "90064 15.842106 0.290443 2 \n", + "90065 47.717833 0.590924 2 \n", + "\n", + "[1200 rows x 9 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# all features are contained in dataframe --> extracted features of all pores in dataset \n", + "\n", + "# slicing name of image\n", + "names_kh = [os.path.splitext(os.path.split(image)[1])[0] for image in images_kh]\n", + "names_lof = [os.path.splitext(os.path.split(image)[1])[0] for image in images_lof]\n", + "names_process = [os.path.splitext(os.path.split(image)[1])[0] for image in images_process]\n", + "\n", + "# combining lists\n", + "labeled_pores = names_kh + names_lof + names_process\n", + "\n", + "# selecting rows by combined list\n", + "local_features = dataframe[dataframe['image_index'].isin(labeled_pores)]\n", + "\n", + "# adding label to dataframe\n", + "key_hole = [0 for i in range(len(names_kh))]\n", + "lof = [1 for i in range(len(names_lof))]\n", + "process = [2 for i in range(len(names_process))]\n", + "\n", + "labels = key_hole + lof + process\n", + " \n", + "local_features['label'] = labels\n", + "\n", + "local_features\n", + "\n", + "# local_features.to_csv('Images/Ti6Al4V_Analysis/Ti6Al4V_local_defect_features.csv', sep=';', decimal=\",\", index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "5e8fc9f5-65f1-4e71-9340-21c75d002d90", + "metadata": { + "tags": [] + }, + "source": [ + "# Unsupervised Models" + ] + }, + { + "cell_type": "markdown", + "id": "e3b83ea5-e8f1-4afd-8193-a2527b2282e4", + "metadata": { + "tags": [] + }, + "source": [ + "## kMeans" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c31d8534-843c-42c2-aa34-c46d63fcbb6f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pca0pca1pca2pca3pca4pca5pca6pca7pca8pca9...pca241pca242pca243pca244pca245pca246pca247pca248pca249labels
0255.425598-1785.702271925.004456-647.859558558.293701-352.141418-539.285645-457.65905881.950119-70.469582...-14.26652347.72750124.730694-25.18042940.02402516.89291853.906643-17.68145430.3394760
1803.354980-2022.260864837.281555433.116974-408.251678193.452789582.161194264.769287-683.036194174.103912...-2.07882135.56679515.871997-34.090492-36.830696-15.575199-51.617443-81.089500-49.1718600
22319.719727-2092.848877-420.375305-251.272079512.9344481146.280273-162.358948-497.942871-366.30847247.767113...-57.140324-55.58275657.23656590.583664-1.16124614.53749742.300175-20.864361-2.1664840
3-1407.126343-684.353210968.24578923.67079932.504410-1071.493164143.97363379.708427646.462952-257.145782...64.606071-6.576953-96.6572803.912672-26.86332326.66540346.656254-2.38064248.2282520
4-963.219238-1100.2916261195.094727-167.95707782.295097-1099.581055-66.005585140.388641540.216675-121.797462...25.663364-57.5227285.5483590.37056427.228884-36.064571-23.228437-37.61468167.6368940
..................................................................
1195-2778.831543924.899963-434.967377-16.103178-23.349348242.882431-37.833126-41.775177-119.16433743.505074...11.097609-11.073200-13.2535965.406798-17.156864-7.780338-24.389185-10.724975-5.4437702
1196-2698.895264819.713989-325.541809-12.013907-24.603376109.989647-37.048916-21.737972-2.6220026.201177...-13.437889-0.313441-10.38170119.76334425.998217-5.2598573.201698-32.150646-23.8191432
1197-2780.819824928.235535-437.094635-16.398756-21.832441247.764145-35.915752-42.458527-121.82815643.289146...16.934982-24.733269-19.3612796.819247-23.084692-11.6255701.362972-16.261547-4.3186792
1198-2794.181641948.835876-456.649323-16.779737-22.549208278.220184-31.817270-44.791748-148.21752950.467155...-2.029772-4.7046189.0330995.793400-10.758887-7.3317451.210876-14.464475-0.8578572
1199-2736.962158871.259827-377.366730-14.921387-28.683245177.661697-41.354248-23.649105-61.64075128.548815...-6.47954839.46415714.158841-25.68635929.01289739.76305416.16370017.22655111.3078342
\n", + "

1200 rows × 251 columns

\n", + "
" + ], + "text/plain": [ + " pca0 pca1 pca2 pca3 pca4 \\\n", + "0 255.425598 -1785.702271 925.004456 -647.859558 558.293701 \n", + "1 803.354980 -2022.260864 837.281555 433.116974 -408.251678 \n", + "2 2319.719727 -2092.848877 -420.375305 -251.272079 512.934448 \n", + "3 -1407.126343 -684.353210 968.245789 23.670799 32.504410 \n", + "4 -963.219238 -1100.291626 1195.094727 -167.957077 82.295097 \n", + "... ... ... ... ... ... \n", + "1195 -2778.831543 924.899963 -434.967377 -16.103178 -23.349348 \n", + "1196 -2698.895264 819.713989 -325.541809 -12.013907 -24.603376 \n", + "1197 -2780.819824 928.235535 -437.094635 -16.398756 -21.832441 \n", + "1198 -2794.181641 948.835876 -456.649323 -16.779737 -22.549208 \n", + "1199 -2736.962158 871.259827 -377.366730 -14.921387 -28.683245 \n", + "\n", + " pca5 pca6 pca7 pca8 pca9 ... \\\n", + "0 -352.141418 -539.285645 -457.659058 81.950119 -70.469582 ... \n", + "1 193.452789 582.161194 264.769287 -683.036194 174.103912 ... \n", + "2 1146.280273 -162.358948 -497.942871 -366.308472 47.767113 ... \n", + "3 -1071.493164 143.973633 79.708427 646.462952 -257.145782 ... \n", + "4 -1099.581055 -66.005585 140.388641 540.216675 -121.797462 ... \n", + "... ... ... ... ... ... ... \n", + "1195 242.882431 -37.833126 -41.775177 -119.164337 43.505074 ... \n", + "1196 109.989647 -37.048916 -21.737972 -2.622002 6.201177 ... \n", + "1197 247.764145 -35.915752 -42.458527 -121.828156 43.289146 ... \n", + "1198 278.220184 -31.817270 -44.791748 -148.217529 50.467155 ... \n", + "1199 177.661697 -41.354248 -23.649105 -61.640751 28.548815 ... \n", + "\n", + " pca241 pca242 pca243 pca244 pca245 pca246 \\\n", + "0 -14.266523 47.727501 24.730694 -25.180429 40.024025 16.892918 \n", + "1 -2.078821 35.566795 15.871997 -34.090492 -36.830696 -15.575199 \n", + "2 -57.140324 -55.582756 57.236565 90.583664 -1.161246 14.537497 \n", + "3 64.606071 -6.576953 -96.657280 3.912672 -26.863323 26.665403 \n", + "4 25.663364 -57.522728 5.548359 0.370564 27.228884 -36.064571 \n", + "... ... ... ... ... ... ... \n", + "1195 11.097609 -11.073200 -13.253596 5.406798 -17.156864 -7.780338 \n", + "1196 -13.437889 -0.313441 -10.381701 19.763344 25.998217 -5.259857 \n", + "1197 16.934982 -24.733269 -19.361279 6.819247 -23.084692 -11.625570 \n", + "1198 -2.029772 -4.704618 9.033099 5.793400 -10.758887 -7.331745 \n", + "1199 -6.479548 39.464157 14.158841 -25.686359 29.012897 39.763054 \n", + "\n", + " pca247 pca248 pca249 labels \n", + "0 53.906643 -17.681454 30.339476 0 \n", + "1 -51.617443 -81.089500 -49.171860 0 \n", + "2 42.300175 -20.864361 -2.166484 0 \n", + "3 46.656254 -2.380642 48.228252 0 \n", + "4 -23.228437 -37.614681 67.636894 0 \n", + "... ... ... ... ... \n", + "1195 -24.389185 -10.724975 -5.443770 2 \n", + "1196 3.201698 -32.150646 -23.819143 2 \n", + "1197 1.362972 -16.261547 -4.318679 2 \n", + "1198 1.210876 -14.464475 -0.857857 2 \n", + "1199 16.163700 17.226551 11.307834 2 \n", + "\n", + "[1200 rows x 251 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_red_labeled = data_red.dataframe\n", + "data_red_labeled['labels'] = labels\n", + "data_red_labeled" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2dd84860-c4cd-4296-92ea-86a5b7933344", + "metadata": {}, + "outputs": [], + "source": [ + "X = data_red_labeled.drop(['labels'], axis=1)\n", + "Y = data_red_labeled['labels']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "417e12ae-63b1-455b-ab06-b30ad4e2e8ac", + "metadata": {}, + "outputs": [], + "source": [ + "# splitting data into training and test partition\n", + "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8e82a915-c9c5-4652-a5af-006c2278a7aa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\altmann\\Miniconda3\\envs\\poreClustering\\lib\\site-packages\\sklearn\\cluster\\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n", + " warnings.warn(\n", + "C:\\Users\\altmann\\Miniconda3\\envs\\poreClustering\\lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=4.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
KMeans(n_clusters=3, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KMeans(n_clusters=3, random_state=42)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k = 3\n", + "kmeans = KMeans(n_clusters=k, random_state=42)\n", + "kmeans.fit(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "25b5bcb0-0278-4d47-8dc0-8a9e13c1dd68", + "metadata": {}, + "outputs": [], + "source": [ + "labels_pred_kmeans = kmeans.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e3732e70-8db5-411c-935f-c9166ff6eaf2", + "metadata": {}, + "outputs": [], + "source": [ + "modelkmeans = KMeansClassifier(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e69cc6d8-e7e6-4620-a0ee-9613d285b94d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\altmann\\Miniconda3\\envs\\poreClustering\\lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=4.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "modelkmeans.train(k=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c8737efd-2503-4461-9802-f361398c042d", + "metadata": {}, + "outputs": [], + "source": [ + "labels_pred_kmeans = modelkmeans.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "05e292e9-41c0-4ae9-8ca5-e830f323d8e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "360" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(labels_pred_kmeans)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9dd850b2-1451-41c8-b174-bdefb91d2b02", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "360" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "470f14bd-d417-4c22-a7d9-77a79972a909", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm = confusion_matrix(y_test, labels_pred_kmeans)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4599e942-4518-4c74-999f-8eadeb77d37b", + "metadata": {}, + "outputs": [], + "source": [ + "try: \n", + " os.makedirs('Images/Ti6Al4V_Analysis/results/results_kmeans/0')\n", + " os.makedirs('Images/Ti6Al4V_Analysis/results/results_kmeans/1')\n", + " os.makedirs('Images/Ti6Al4V_Analysis/results/results_kmeans/2')\n", + " # os.makedirs('pore_label/3')\n", + "except OSError:\n", + " pass\n", + " \n", + "for i, image in enumerate(images):\n", + " label = labels_pred_kmeans[i]\n", + " img = cv.resize(cv.imread(image), dsize=(100, 100), interpolation=cv.INTER_CUBIC)\n", + " cv.imwrite('Images/Ti6Al4V_Analysis/results/results_kmeans/{}/{}.jpg'.format(label, i), img)" + ] + }, + { + "cell_type": "markdown", + "id": "a5692d17-4ed8-4a77-882c-b3b9a401ad5d", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## DBSCAN" + ] + }, + { + "cell_type": "markdown", + "id": "67373261-03a8-4485-8b39-fb2303947c9f", + "metadata": { + "tags": [] + }, + "source": [ + "### Pixel Features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d21315f6-b12d-4f0b-8d87-6e60cbf28ca3", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan = DBSCANClassifier(data_red.scale(), n_neighbors=2, min_samples=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e73e9d6-abcc-4cf4-b3d3-193036afe15b", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan.knee" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a094ab5-639d-43b6-a70d-64d2a5634a63", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan.n_clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72b633d8-83ee-47e7-84cc-8a1b70394714", + "metadata": {}, + "outputs": [], + "source": [ + "labels_pred_dbscan = modeldbscan.labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa1f601b-f45b-42f7-80c3-364cff6808ba", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(-1, modeldbscan.n_clusters, 1):\n", + " try:\n", + " os.makedirs('Images/Ti6Al4V_Analysis/results/results_dbscan/{}'.format(i))\n", + " except OSError:\n", + " pass\n", + " \n", + "for i, image in enumerate(images):\n", + " label = labels_pred_dbscan[i]\n", + " img = cv.resize(cv.imread(image), dsize=(100, 100), interpolation=cv.INTER_CUBIC)\n", + " cv.imwrite('Images/Ti6Al4V_Analysis/results/results_dbscan/{}/{}.jpg'.format(label, i), img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cef38634-8f5e-4e82-aa03-fe9437f27a96", + "metadata": {}, + "outputs": [], + "source": [ + "cm = confusion_matrix(results['labels'], labels_pred_dbscan+1)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "markdown", + "id": "cf856277-6784-4a5f-bdcb-64199fc8d861", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### local Features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94f7c6a3-cdbb-4bd9-8c28-194e637ea7d5", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan_lf = DBSCANClassifier(X)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21654256-660d-4943-9fe3-d7a2d0560735", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan_lf.knee" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "963f77c2-5b56-4895-9dcf-62ab15498c89", + "metadata": {}, + "outputs": [], + "source": [ + "modeldbscan_lf.n_clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f06534f8-20b4-453a-b904-6f9525e752a1", + "metadata": {}, + "outputs": [], + "source": [ + "labels_pred_dbscan = modeldbscan_lf.labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "baa95e7e-29e0-4f42-999a-5afe93eb40ab", + "metadata": {}, + "outputs": [], + "source": [ + "cm = confusion_matrix(sampling['label'], labels_pred_dbscan+1)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "markdown", + "id": "3d7f53de-6c7c-49e7-9a59-3214b6a22425", + "metadata": { + "tags": [] + }, + "source": [ + "# Supervised Models" + ] + }, + { + "cell_type": "markdown", + "id": "3c8e44a1-a205-449e-90ca-00660c9cdd83", + "metadata": { + "tags": [] + }, + "source": [ + "## Binary Image Features" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "b206a135-e9fc-420a-be52-09eba68cad16", + "metadata": {}, + "outputs": [], + "source": [ + "# mixing data rows --> data shouldnt be sorted for splitting \n", + "data_sample = results.copy().sample(frac=1).reset_index(drop=True)\n", + "X = data_sample.drop(['labels'], axis=1)\n", + "Y = data_sample['labels']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7ee2b19d-a9f2-4880-855d-5d0a83f0d0d9", + "metadata": {}, + "outputs": [], + "source": [ + "# scaling / normalizing the features\n", + "\n", + "# copy the dataframe\n", + "df_norm = X.copy()\n", + "# apply min-max scaling\n", + "for column in df_norm.columns:\n", + " df_norm[column] = (df_norm[column] - df_norm[column].min()) / (df_norm[column].max() - df_norm[column].min())\n", + "\n", + "X = df_norm" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "4f932be2-c645-4b02-a8b5-36194e6b8eed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pca0pca1pca2pca3pca4pca5pca6pca7pca8pca9...pca241pca242pca243pca244pca245pca246pca247pca248pca249labels
794-2790.063965941.363220-450.876312-16.881285-23.757185266.390656-35.812542-43.869370-139.33947850.013527...18.105705-4.2262392.247457-21.6065415.6720590.1130892.93867511.76077411.5568082
190-2674.676758787.409058-292.080841-12.459249-23.54597369.260490-40.746521-12.49001733.843327-2.004482...3.731455-0.126414-19.8279760.3328970.4644692.854188-4.628636-10.954101-17.0883392
10241825.270142933.7229001387.870728589.914185-3328.892822381.851166-1327.042969405.855927653.255737513.292725...158.74577328.83920332.63030678.67158546.21959320.28943351.24433943.788345-39.0640261
519-2783.756348931.952576-441.249786-16.820127-22.943647252.919617-36.649456-42.258301-126.11209146.502876...22.02484115.041027-9.137911-17.1081014.355286-3.861822-2.75220520.779800-5.2262222
707-2585.554199673.569702-177.155655-11.210746-24.361341-63.133881-39.08774614.608357139.730865-38.287655...50.294846-23.780457-8.237935-23.4474608.4936961.24148418.59352316.1757356.3432532
125-968.545288-1103.6451421213.158813-135.15843261.409969-1121.710205-16.381384171.112793562.359192-147.075012...13.5194307.0004457.26580045.118504-5.01408924.601515-31.84525131.63601749.2246280
\n", + "

6 rows × 251 columns

\n", + "
" + ], + "text/plain": [ + " pca0 pca1 pca2 pca3 pca4 \\\n", + "794 -2790.063965 941.363220 -450.876312 -16.881285 -23.757185 \n", + "190 -2674.676758 787.409058 -292.080841 -12.459249 -23.545973 \n", + "1024 1825.270142 933.722900 1387.870728 589.914185 -3328.892822 \n", + "519 -2783.756348 931.952576 -441.249786 -16.820127 -22.943647 \n", + "707 -2585.554199 673.569702 -177.155655 -11.210746 -24.361341 \n", + "125 -968.545288 -1103.645142 1213.158813 -135.158432 61.409969 \n", + "\n", + " pca5 pca6 pca7 pca8 pca9 ... \\\n", + "794 266.390656 -35.812542 -43.869370 -139.339478 50.013527 ... \n", + "190 69.260490 -40.746521 -12.490017 33.843327 -2.004482 ... \n", + "1024 381.851166 -1327.042969 405.855927 653.255737 513.292725 ... \n", + "519 252.919617 -36.649456 -42.258301 -126.112091 46.502876 ... \n", + "707 -63.133881 -39.087746 14.608357 139.730865 -38.287655 ... \n", + "125 -1121.710205 -16.381384 171.112793 562.359192 -147.075012 ... \n", + "\n", + " pca241 pca242 pca243 pca244 pca245 pca246 \\\n", + "794 18.105705 -4.226239 2.247457 -21.606541 5.672059 0.113089 \n", + "190 3.731455 -0.126414 -19.827976 0.332897 0.464469 2.854188 \n", + "1024 158.745773 28.839203 32.630306 78.671585 46.219593 20.289433 \n", + "519 22.024841 15.041027 -9.137911 -17.108101 4.355286 -3.861822 \n", + "707 50.294846 -23.780457 -8.237935 -23.447460 8.493696 1.241484 \n", + "125 13.519430 7.000445 7.265800 45.118504 -5.014089 24.601515 \n", + "\n", + " pca247 pca248 pca249 labels \n", + "794 2.938675 11.760774 11.556808 2 \n", + "190 -4.628636 -10.954101 -17.088339 2 \n", + "1024 51.244339 43.788345 -39.064026 1 \n", + "519 -2.752205 20.779800 -5.226222 2 \n", + "707 18.593523 16.175735 6.343253 2 \n", + "125 -31.845251 31.636017 49.224628 0 \n", + "\n", + "[6 rows x 251 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sampling = X.copy()\n", + "sampling['labels'] = Y\n", + "sampling.sample(6)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "8565c9ce-cfd2-455f-b978-9d68fcc5ecd6", + "metadata": {}, + "outputs": [], + "source": [ + "# splitting data into training and test partition\n", + "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "f4f79f8a-10f3-4ec1-a19f-2c5089f595f5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "accuracies = []\n", + "rf_ = []\n", + "k = range(1,21)\n", + "for i in k:\n", + " rf = RandomForestClassifier(max_depth=i, n_estimators=15, max_features=100)\n", + " rf.fit(X_train, y_train)\n", + " y_pred_train = rf.predict(X_train)\n", + " y_pred_test = rf.predict(X_test)\n", + " accuracy_train = accuracy_score(y_train, y_pred_train)\n", + " accuracy_test = accuracy_score(y_test, y_pred_test)\n", + " \n", + " rf_.append(rf)\n", + " accuracies.append([accuracy_train, accuracy_test])\n", + " \n", + "fig = plt.figure(figsize=(8,6))\n", + "ax = fig.add_subplot(1,2,1)\n", + "ax.scatter(k, [elmnt[0] for elmnt in accuracies], c='r', label='Training Split')\n", + "ax.scatter(k, [elmnt[1] for elmnt in accuracies], c='b', label='Test Split')\n", + "ax.legend()\n", + "ax1 = fig.add_subplot(1,2,2)\n", + "ax1.plot(k, [abs(elmnt[0]-elmnt[1]) for elmnt in accuracies], 'bo--', label='Absolute Difference')\n", + "ax1.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "0dec508a-11be-493c-8c51-170703cd8c86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Test: 0.9527777777777777\n", + "Accuracy Train: 0.9976190476190476\n" + ] + } + ], + "source": [ + "rf = rf_[8]\n", + "y_pred_test = rf.predict(X_test)\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "print(\"Accuracy Test:\", accuracy_test)\n", + "\n", + "y_pred_train = rf.predict(X_train)\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print('Accuracy Train:', accuracy_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "1ef007a8-01c1-4b57-8c25-4cf7bf0e9003", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "##### Create the confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred_test)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "e07fcafc-99c6-4687-9279-154db8f98069", + "metadata": {}, + "outputs": [], + "source": [ + "pickle.dump(rf, open('Models/Classifier_Model_Binary_RF'+'.pickle', 'wb'))" + ] + }, + { + "cell_type": "markdown", + "id": "2cc06173-54a1-40d1-954b-34c67dc414ff", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Local Extracted Features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51aea9b5-fed6-4155-bd61-e1c95ed564f7", + "metadata": {}, + "outputs": [], + "source": [ + "# mixing data rows --> data shouldnt be sorted for splitting \n", + "data_sample = local_features.copy().sample(frac=1).reset_index(drop=True)\n", + "X = data_sample.drop(['label', 'img', 'pore_number', 'image_index'], axis=1)\n", + "Y = data_sample['label']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d15e796-5101-4d24-a8f1-18b214722997", + "metadata": {}, + "outputs": [], + "source": [ + "# scaling / normalizing the features\n", + "\n", + "# copy the dataframe\n", + "df_norm = X.copy()\n", + "# apply min-max scaling\n", + "for column in df_norm.columns:\n", + " df_norm[column] = (df_norm[column] - df_norm[column].min()) / (df_norm[column].max() - df_norm[column].min())\n", + "\n", + "X = df_norm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d5a8c04-a778-411a-8e8b-45171533ba7a", + "metadata": {}, + "outputs": [], + "source": [ + "sampling = X.copy()\n", + "sampling['label'] = Y\n", + "sampling.sample(6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a042f2c-25b3-487f-9528-ad5b35cf1980", + "metadata": {}, + "outputs": [], + "source": [ + "# splitting data into training and test partition\n", + "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bce3a20d-e3de-4bf1-8e33-d8fea848218c", + "metadata": {}, + "outputs": [], + "source": [ + "accuracies = []\n", + "rf_ = []\n", + "k = range(1,21)\n", + "for i in k:\n", + " rf = RandomForestClassifier(max_depth=i, n_estimators=200, max_features=5)\n", + " rf.fit(X_train, y_train)\n", + " y_pred_train = rf.predict(X_train)\n", + " y_pred_test = rf.predict(X_test)\n", + " accuracy_train = accuracy_score(y_train, y_pred_train)\n", + " accuracy_test = accuracy_score(y_test, y_pred_test)\n", + " \n", + " rf_.append(rf)\n", + " accuracies.append([accuracy_train, accuracy_test])\n", + " \n", + "fig = plt.figure(figsize=(8,6))\n", + "ax = fig.add_subplot(1,2,1)\n", + "ax.scatter(k, [elmnt[0] for elmnt in accuracies], c='r', label='Training Split')\n", + "ax.scatter(k, [elmnt[1] for elmnt in accuracies], c='b', label='Test Split')\n", + "ax.legend()\n", + "ax1 = fig.add_subplot(1,2,2)\n", + "ax1.plot(k, [abs(elmnt[0]-elmnt[1]) for elmnt in accuracies], 'bo--', label='Absolute Difference')\n", + "ax1.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77e4accf-be21-4bed-8362-64698a1827d6", + "metadata": {}, + "outputs": [], + "source": [ + "rf = rf_[5]\n", + "y_pred_test = rf.predict(X_test)\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "print(\"Accuracy Test:\", accuracy_test)\n", + "\n", + "y_pred_train = rf.predict(X_train)\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print('Accuracy Train:', accuracy_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a07e6323-6ea7-4d75-9836-831eaf4eb132", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the confusion matrix\n", + "cm = confusion_matrix(y_test, y_pred_test)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e26651e-700a-4ca2-b5fa-1ba764490ba2", + "metadata": {}, + "outputs": [], + "source": [ + "# plt.figure(figsize=(50,50))\n", + "corr = sampling.corr(numeric_only=True)\n", + "corrplot(corr, size_scale=500, marker=\"s\")\n", + "# plt.savefig('Images/Results/{}/KorrPlot.pdf'.format(dataset))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6619c79f-ea78-4da2-9f34-2ca51430f5c0", + "metadata": { + "tags": [] + }, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3b3eb2d2-1e89-45b2-9eac-1808cc7e6529", + "metadata": {}, + "outputs": [], + "source": [ + "results = data_red.dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6dab6db2-a569-448d-a0b6-bc9e7f63074a", + "metadata": {}, + "outputs": [], + "source": [ + "results['labels'] = labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fb088d5-df6e-474c-a87d-661446599ef2", + "metadata": {}, + "outputs": [], + "source": [ + "results['labels'] = labels\n", + "results['predicted_kmeans'] = labels_pred_kmeans" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8a86641-f5b7-463a-9ba8-568cbb9838e2", + "metadata": {}, + "outputs": [], + "source": [ + "results['predicted_dbscan'] = labels_pred_dbscan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fb212a8-fe1b-4145-bc50-e2050ba5604c", + "metadata": {}, + "outputs": [], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc45ab75-9fd8-4d2c-8bf3-1f8f5d7a9101", + "metadata": {}, + "outputs": [], + "source": [ + "sampling.to_csv('Images/Ti6Al4V_Analysis/Ti6Al4V_labeled_prepared_data.csv', sep=';', decimal=\",\", index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "71a45f04-947a-4f4c-8790-744ac7b48470", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "# ANN Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c33d6041-736a-442d-906b-c3960e20276e", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neural_network import MLPClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc56b488-502e-4116-9b16-e0b8294f18ef", + "metadata": {}, + "outputs": [], + "source": [ + "clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(60,40,20), random_state=1, max_iter=10000000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b0ff09b-7d3b-4984-aa44-affcae58c91a", + "metadata": {}, + "outputs": [], + "source": [ + "clf.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2adb423a-91c5-4492-bdef-51dfd889a218", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_predicted = clf.predict(X_train)\n", + "# Create the confusion matrix\n", + "cm = confusion_matrix(y_train, mlp_predicted)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6177872-9b3b-424c-bf93-4ae2b995e996", + "metadata": {}, + "outputs": [], + "source": [ + "mlp_predicted = clf.predict(X_test)\n", + "# Create the confusion matrix\n", + "cm = confusion_matrix(y_test, mlp_predicted)\n", + "\n", + "ConfusionMatrixDisplay(confusion_matrix=cm).plot();" + ] + }, + { + "cell_type": "markdown", + "id": "25d0b1a4-4617-43a4-acd7-92367f983b3a", + "metadata": { + "tags": [] + }, + "source": [ + "# Final Model Test" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "id": "6572ffdd-569e-4fad-b394-295a926d3912", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "436 pores found!\n" + ] + } + ], + "source": [ + "# load pore images from different material to classify\n", + "images = [filepath for filepath in glob.iglob('Images/Ti6Al4V-Stichprobe_4/pores/405/*.jpg')]\n", + "\n", + "# read and resize pore binary image\n", + "img = [(cv.resize(cv.imread(image, cv.IMREAD_GRAYSCALE), dsize=(100, 100), interpolation=cv.INTER_CUBIC)) for image in images]\n", + "print('{} pores found!'.format(len(images)))" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "id": "c00e4cd0-9f6b-443e-b760-dc09558a3a96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Varianz explained by PCA model with 249 Components is 96.833 %.\n" + ] + } + ], + "source": [ + "# push image data in pipeline\n", + "\n", + "# flattening images into one long vector\n", + "data_px = Preprocessing(img)\n", + "\n", + "# reducing dimensions with pretrained pca model\n", + "data_red = DimensionReductionPCA(data_px.dataframe, k=249, pca_model='Models/PCA_Model_Binary_RF')\n", + "data_red.pca_explain()" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "d7a0a0fd-d9ad-4cf0-93d0-fedd20a99b0b", + "metadata": {}, + "outputs": [], + "source": [ + "# load classifier\n", + "classifier = pickle.load(open('Models/Classifier_Model_Binary_RF.pickle', 'rb'))" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "id": "1236efa7-4c9c-48c9-86ec-f3068a25c576", + "metadata": {}, + "outputs": [], + "source": [ + "# classify pores\n", + "y_pred = classifier.predict(data_red.dataframe)" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "id": "70392f14-d54d-41b3-991f-f323f3db994c", + "metadata": {}, + "outputs": [], + "source": [ + "# save pores depending by predicted label\n", + "try: \n", + " os.makedirs('Images/Ti6Al4V-Stichprobe_4/results/405/0')\n", + " os.makedirs('Images/Ti6Al4V-Stichprobe_4/results/405/1')\n", + " os.makedirs('Images/Ti6Al4V-Stichprobe_4/results/405/2')\n", + " \n", + "except OSError:\n", + " pass\n", + " \n", + "for i, image in enumerate(images):\n", + " label = y_pred[i]\n", + " img = cv.resize(cv.imread(image), dsize=(100, 100), interpolation=cv.INTER_CUBIC)\n", + " cv.imwrite('Images/Ti6Al4V-Stichprobe_4/results/405/{}/{}.jpg'.format(label, i), img)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Models/Classifier_Model_Binary_RF.pickle b/Models/Classifier_Model_Binary_RF.pickle new file mode 100644 index 0000000..3b433a0 Binary files /dev/null and b/Models/Classifier_Model_Binary_RF.pickle differ diff --git a/Models/PCA_Model_Binary_RF.pickle b/Models/PCA_Model_Binary_RF.pickle new file mode 100644 index 0000000..ab775da Binary files /dev/null and b/Models/PCA_Model_Binary_RF.pickle differ diff --git a/analyzer/__init__.py b/analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analyzer/__pycache__/__init__.cpython-310.pyc b/analyzer/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..9af5224 Binary files /dev/null and b/analyzer/__pycache__/__init__.cpython-310.pyc differ diff --git a/analyzer/__pycache__/__init__.cpython-38.pyc b/analyzer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..4a3b72b Binary files /dev/null and b/analyzer/__pycache__/__init__.cpython-38.pyc differ diff --git a/analyzer/__pycache__/__init__.cpython-39.pyc b/analyzer/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..124dba7 Binary files /dev/null and b/analyzer/__pycache__/__init__.cpython-39.pyc differ diff --git a/analyzer/__pycache__/classificator.cpython-39.pyc b/analyzer/__pycache__/classificator.cpython-39.pyc new file mode 100644 index 0000000..ef20f4d Binary files /dev/null and b/analyzer/__pycache__/classificator.cpython-39.pyc differ diff --git a/analyzer/__pycache__/featureextraction.cpython-310.pyc b/analyzer/__pycache__/featureextraction.cpython-310.pyc new file mode 100644 index 0000000..d5e7723 Binary files /dev/null and b/analyzer/__pycache__/featureextraction.cpython-310.pyc differ diff --git a/analyzer/__pycache__/featureextraction.cpython-39.pyc b/analyzer/__pycache__/featureextraction.cpython-39.pyc new file mode 100644 index 0000000..95b3ce1 Binary files /dev/null and b/analyzer/__pycache__/featureextraction.cpython-39.pyc differ diff --git a/analyzer/__pycache__/features.cpython-310.pyc b/analyzer/__pycache__/features.cpython-310.pyc new file mode 100644 index 0000000..e5582fb Binary files /dev/null and b/analyzer/__pycache__/features.cpython-310.pyc differ diff --git a/analyzer/__pycache__/features.cpython-38.pyc b/analyzer/__pycache__/features.cpython-38.pyc new file mode 100644 index 0000000..64cc421 Binary files /dev/null and b/analyzer/__pycache__/features.cpython-38.pyc differ diff --git a/analyzer/__pycache__/features.cpython-39.pyc b/analyzer/__pycache__/features.cpython-39.pyc new file mode 100644 index 0000000..90f209e Binary files /dev/null and b/analyzer/__pycache__/features.cpython-39.pyc differ diff --git a/analyzer/classificator.py b/analyzer/classificator.py new file mode 100644 index 0000000..4e608a4 --- /dev/null +++ b/analyzer/classificator.py @@ -0,0 +1,191 @@ +import cv2 as cv +import numpy as np +from kneed import KneeLocator +from sklearn.cluster import KMeans +from sklearn.cluster import DBSCAN +from sklearn.neighbors import NearestNeighbors +from sklearn.decomposition import PCA +import matplotlib.pyplot as plt +import pickle +import pandas as pd + + + +class PoreSeperator(): + def __init__(self, contour, img, segmentsize=0.1): + self.image = img + self.contour = contour + self.segmentsize = segmentsize + self.pore = self.__seperate() + + def __seperate(self): + width = round(self.segmentsize * self.image.shape[1]) + height = round(self.segmentsize * self.image.shape[0]) + blanc = np.zeros_like(self.image) + filled = cv.fillPoly(blanc, [self.contour], color=(255,255,255)) + + M = cv.moments(self.contour) + x = int(M['m10'] / M['m00']) + y = int(M['m01'] / M['m00']) + + filled = filled[y-round(height/2):y+round(height/2), x-round(width/2):x+round(width/2)] + + return filled + + def check_size(self): + if self.pore.shape[0] == round(self.segmentsize * self.image.shape[0]) and self.pore.shape[1] == round(self.segmentsize * self.image.shape[1]): + return True + else: + return False + + def save(self, name): + cv.imwrite(name, self.pore) + + + +class Preprocessing(): + def __init__(self, pores): + self.pores = pores + self.pores_reshpd = [np.float32(pore.reshape((-1,1))).T for pore in pores] # Bilddaten zu Vektor umwandeln + self.data = [data[0] for data in self.pores_reshpd] + self.dataframe = self.__create_dataframe() + + def __create_dataframe(self): + data_dict = {} + for i in range(len(self.data[0])): + data_dict['x{}'.format(i)] = [data[i] for data in self.data] + return pd.DataFrame.from_dict(data_dict) + + + +class DimensionReductionPCA(): + def __init__(self, data, pca_model=None, k=3): + self.data = data + self.k = k + self.pca_model, self.explanation = self.__create_pca_model() if pca_model == None else self.__load_pca_model(pca_model) + self.pca = self.__calc_pca() + self.dataframe = self.__create_dataframe() + + def __create_pca_model(self): + if self.data.shape[0] < self.data.shape[1]: + n = self.data.shape[0] + else: + n = self.data.shape[1] + + pca_ = PCA(n_components=n, random_state=2020) + pca_.fit(self.data) + explanation = np.cumsum(pca_.explained_variance_ratio_ * 100) + + pca_m = PCA(n_components=self.k, random_state=2020) + pca_m.fit(self.data) + return pca_m, explanation + + def __calc_pca(self): + return self.pca_model.transform(self.data) + + def __load_pca_model(self, name): + pca_ = pickle.load(open('{}.pickle'.format(name), 'rb')) + explanation = np.cumsum(pca_.explained_variance_ratio_ * 100) + return pca_, explanation + + def __create_dataframe(self): + data_dict = {} + for i in range(len(self.pca[0])): + data_dict['pca{}'.format(i)] = [elmnt[i] for elmnt in self.pca] + return pd.DataFrame.from_dict(data_dict) + + def save_pca_model(self, name): + pickle.dump(self.pca_model, open('{}.pickle'.format(name), 'wb')) + print('PCA Model saved as {}.pickle'.format(name)) + + def pca_explain(self): + print('Varianz explained by PCA model with {} Components is {:.3f} %.'.format(self.k, self.explanation[self.k])) + + def scale(self): + # copy the dataframe + df_norm = self.dataframe.copy() + # apply min-max scaling + for column in df_norm.columns: + df_norm[column] = (df_norm[column] - df_norm[column].min()) / (df_norm[column].max() - df_norm[column].min()) + + return df_norm + + + +class DBSCANClassifier(): + def __init__(self, data, n_neighbors=2, min_samples = 3, knee=None): + self.data = data + self.n_neighbors = n_neighbors + self.min_samples = min_samples + self.knee = self.__calc_knee() if knee == None else knee + self.n_clusters = None + self.n_outliers = None + self.labels = self.__dbscan() + + def __calc_knee(self): + # finding best epsilon for DBSCAN: https://iopscience.iop.org/article/10.1088/1755-1315/31/1/012012/pdf + nbrs = NearestNeighbors(n_neighbors = self.n_neighbors, metric='euclidean').fit(self.data) # Anzahl an Nachbarn sollte etwa das Doppelte von der Featureanzahl sein + neigh_dist, neigh_ind = nbrs.kneighbors(self.data) + sort_neigh_dist = np.sort(neigh_dist, axis=0) + + k_dist = sort_neigh_dist[:, self.n_neighbors-1] + x = [i for i in range(len(k_dist))] + + kneedle = KneeLocator(x=x, y=k_dist, S=1.0, curve='concave', direction='increasing', online=True) + knee = kneedle.knee_y + return knee + + def __dbscan(self): + knee = ( self.knee if self.knee != None else 0.1 ) + db = DBSCAN(eps=knee, min_samples=self.min_samples).fit(self.data) + labels = db.labels_ + + i = 0 + for element in labels: # Anzahl an Ausreißern zählen + if element==-1: + i+=1 + + self.n_clusters = len(set(labels)) - (1 if -1 in labels else 0) + self.n_noise = list(labels).count(-1) + return labels + + + +class KMeansClassifier(): + def __init__(self, data): + self.data = data + + def find_knee(self, k=5): + K = range(1, k) + distortions = [] + + for k in K: + kmeans = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=100, random_state=None) + kmeans.fit(self.data) + distortions.append(kmeans.inertia_) + + # kneedle = KneeLocator(x=K, y=distortions, S=1.0, curve='concave', direction='decreasing', online=True) + + plt.figure(figsize=(16,8)) + plt.plot(K, distortions, 'bx-') + plt.xlabel('k') + plt.ylabel('Distortion') + plt.title('Am Ellenbogen liegt die perfekte Anzahl an Clustern') + plt.show() + + # return distortions# kneedle.knee_y + + def train(self, k=3): + kmeans = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=100, random_state=None) + kmeans.fit(self.data) + self.model = kmeans + + def predict(self, data): + self.model.predict(data) + clusters = self.model.labels_ + + return clusters + + def save_model(self, name): + pickle.dump(self.model, open(name+'.pickle', 'wb')) + print('Model saved as {}.pickle!'.format(name)) \ No newline at end of file diff --git a/analyzer/featureextraction.py b/analyzer/featureextraction.py new file mode 100644 index 0000000..4c4ae31 --- /dev/null +++ b/analyzer/featureextraction.py @@ -0,0 +1,828 @@ +######################################################## F E A T U R E E X T R A K T I O N ######################################################## +###### ###### +###### A U T H O R I N F O R M A T I O N S ###### +###### Mika Altmann ###### +###### 20th of February, 2023 ###### +###### Leibniz-Institute for Materials Science, Bremen, Germany ###### +###### ###### +###### D E S C R I B T I O N ###### +###### These functions containing methods to extract multiple features from metallurgical micrographs, especially for porosity evaluations ###### +###### in PBF-LB/M processes. ###### +###### ###### +##################################################################################################################################################### + +import cv2 as cv +import numpy as np +from numpy import median +import matplotlib.pyplot as plt +from pywt import dwt2 +import pywt +import pandas as pd +import math +from scipy.stats import skew +import imutils + +##################################################################################################################################################### + +###### Alle Konturen in dem Schliffbild bestimmen ###### +def get_Contours(image): + ### Bild in Graustufen umwandeln, wenn es ein RGB Bild ist ### + try: + image = cv.cvtColor(image, cv.COLOR_RGB2GRAY) + except: + image=image + + ### Weichzeichnen mit Gaussfilter und binarisieren ### + # bei 5x5 Gausszeichner gehen viele kleine Poren/Defekte verloren + # bei 3x3 Gausszeichner werden mehr kleine Poren/Defekte erkannt + img_blur = cv.GaussianBlur(image, (5, 5), 0) + threshold, img_binary = cv.threshold(img_blur, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU) + + ### Bild zuschneiden ### + # 1/3 des Bildes wird oben und 1/3 unten weg geschnitte + # es bleibt nur noch ein Streifen in der Mitte (idealerweise stabilder Prozesszustand) + # Entfernung der Skalen + height = img_binary.shape[0] + width = img_binary.shape[1] + + #upper_crop = round(height * 1/3) + #lower_crop = round(height * 2/3) + + #Center Crop + # gleich großen Untersuchungsbereich aus den Bildern ausschneiden + w = round( 2000 * 1.79173 ) + h = round( 2000 * 1.79173 ) + + x = round( width/2 - w/2 ) + y = round( height/2 - h/2 ) + + img_binary = img_binary[y:y+h, x:x+w] + img_binary = cv.copyMakeBorder(img_binary, 5, 5, 5, 5, cv.BORDER_CONSTANT, None, value=0) + + ###### Konturen und deren Hierarchien bestimmen und speichern ###### + # contours, hierarchy = cv.findContours(img_binary, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE) + contours, hierarchy = cv.findContours(img_binary, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE) + +# ### Winkel der Probenkontur bestimmen ### +# contour_area = [cv.contourArea(contour) for contour in contours] +# specimen_index = [i for i in range(len(contours)) if contour_area[i] == max(contour_area)][0] +# specimen_bounding = cv.minAreaRect(contours[specimen_index]) +# specimen_angle = specimen_bounding[2] + +# ### originales Bild rotieren ### +# img_rot = imutils.rotate(image, angle= -1*specimen_angle) + + ### hierarchy ### + # hierarchy[i][0]: the index of the next contour of the same level + # hierarchy[i][1]: the index of the previous contour of the same level + # hierarchy[i][2]: the index of the first child + # hierarchy[i][3]: the index of the parent + + # print('Anzahl an Konturen: {}'.format(len(contours))) + + return contours, hierarchy[0], img_binary, threshold + +##################################################################################################################################################### + +###### Bounding Boxen und Polygone um alle Poren/Defekte legen ###### +def get_BoundingBox(contours): + # leere Arrays erzeugen in denen alle Boundingboxen und Polygone gepseichert werden + bounding_poly = [None]*len(contours) + bounding_rect = [None]*len(contours) + + # für jede Kontur eine Boundingbox und ein umschließendes Polygon erzeugen + for i, contour in enumerate(contours): + bounding_poly[i] = cv.approxPolyDP(contour, 1, True) + bounding_rect[i] = cv.boundingRect(bounding_poly[i]) + + # Koordinaten der Bounding Boxen zurück geben + return bounding_rect + +##################################################################################################################################################### + +###### die Poren mithilfe von den Boundingboxen segmentieren ###### +def segment_Contours(bounding_rect, binary_image): + # leeren Array für alle Region of Interests erzeugen + roi = [None]*len(bounding_rect) + + # Über alle Boundingboxen iterieren und die ROIs in dem Array speichern + for i, rect in enumerate(bounding_rect): + roi[i] = binary_image[int(bounding_rect[i][1]): int(bounding_rect[i][1]+int(bounding_rect[i][3])), + int(bounding_rect[i][0]): int(bounding_rect[i][0])+int(bounding_rect[i][2])] + + # alle einzeln segmentierten Konturen zurückgeben + return roi + +##################################################################################################################################################### + +###### Nach Poren und Partikeln sortieren ###### +# Die Konuturen sind sowohl Partikel als auch Poren +# Partikel können dabei auf der Probenaußenseite als auch innerhalb von Poren vorliegen +# eine Unterscheidung zwischen Partikeln und Poren ist zwingend notwendig für die Ableitung statistischer Größen + +def sort_Contours(contours, hierarchy, binary_image): + + ### Non Parents finden ### + # haben keine Eltern-Kontur + # sind damit die Probenkontur oder Anhaftungen außerhalb der Probenkontur + # Partikel innerhalb von Poren haben allerdings ebenfalls keine Eltern-Kontur + # es müssen innere von äußeren Partikeln getrennt werden + + non_parents_shape = [] + non_parents_index = [] + + for i, contour in enumerate(contours): + if hierarchy[i][3] <= 0: + non_parents_shape.append(contour.shape[0]*contour.shape[1]) # Fläche aller non_parents berechnen (Größe der Boundingbox) + non_parents_index.append(i) # Liste der "realen" Indizes in non_parents_index speichern + + ### Probenkontur finden ### + # erheblich größer als alle anderen Konturen + # Index in der Liste alle Konut + specimen_contour_shape = max(non_parents_shape) + + for i, shape in enumerate(non_parents_shape): + if shape == specimen_contour_shape: + specimen_contour_index = non_parents_index[i] + non_parents_shape.pop(i) # Probenkontur aus den non_parents_shape löschen + non_parents_index.pop(i) # Probenkontur aus den non_parents_index löschen + + ### Kontur und Hierarchy der Probenkontur abspeichern ### + # specimen_contour[0] enthält die Kontur + # specimen_contour[1] enthält die hierarchy + # specimen_contour[2] enthält den realen Index + specimen_contour = [contours[specimen_contour_index], hierarchy[specimen_contour_index], specimen_contour_index] + +# ### inner Poren entfernen und Differenz bestimmen #### +# img = np.zeros_like(binary_image) +# particle_contours = [contours[index] for index in non_parents_index] +# img = cv.drawContours(img, particle_contours, -1, (255,255,255), -1) # alle Partikel (non_parents) in weiß einzeichnen +# img = cv.drawContours(img, specimen_contour[0], 0, (2555,255,255), -1) +# ## Konturen der äußeren Partikel finden ## +# outer_particles, _ = cv.findContours(img, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE) +# outer_particles_shape = [contour.shape[0]*contour.shape[1] for contour in outer_particles] +# ## Indizes der äußeren Partikel bestimmen ## +# outer_particles_index = [non_parents_index[i] for i, shape in enumerate(non_parents_shape) if shape in outer_particles_shape] +# inner_particles_index = [non_parents_index[i] for i, shape in enumerate(non_parents_shape) if not shape in outer_particles_shape] + +# print('Anzahl an Partikeln: {}'.format(len(non_parents_shape))) +# print('Anzahl äußerer Partikel: {}'.format(len(outer_particles))) +# print('Anzahl innerer Partikel:{}'.format(len(inner_particles_index))) +# cv.imwrite('Images/outer_Particles.jpg', img) + + ### Poren finden ### + # müssen alle innerhalb der Probenkontur liegen + # müssen als Eltern-Kontur die Probenkontur haben + inner_pores_index = [] + + for i, contour in enumerate(hierarchy): + if contour[3] == specimen_contour_index: # Filtern ob die Pore als Elternkontur die Probenkontur hat + inner_pores_index.append(i) + + ### Ausgeben von Informationen über die Ergebnisse ### + # print('Probenkontur-Index: {}'.format(specimen_contour_index)) + # print('Anzahl an Partikelanhaftungen: {}'.format(len(non_parents_index))) + # print('Anzahl an innerer Poren: {}'.format(len(inner_pores_index))) + + ### Probenkontur und Anhaftungen zurückgeben ### + # Rückgabe der realen Indizes der anhaftenden Partikel + # Größen etc. der Anhaftungen können dann über den Index aus der Konturliste ausgelesen werden. + outer_particles_index = non_parents_index # Anhaftungen sind alle Konturen ohne Eltern und ohne die Probenkontur + return specimen_contour, outer_particles_index, inner_pores_index + +##################################################################################################################################################### + +###### Porenfeatures bestimmen und Porenzuschnitte speichern ###### +# Schliffbild maskieren, sodass alle äußeren Partikel weg fallen +def get_Pore_Features(binary_img, specimen_contour, circularity_threshold, save_pores, save_path, sizeFilter): + ### idealisiertes Schliffbild erzeugen ### + # leeres Bild [None] --> Partikel in Schwarz [0] --> Poren in weiß [255] ==> äußere Partikel verschwinden + # Poren in äußeren Partikeln könnten im idealisierten Bild auftreten + # mask = specimen_contour # Probenkontur als Maske --> äußere Partikel fallen weg + # blanc = np.zeros_like(binary_img) # leeres schwarzes Bild erzeugen + # color = [255,255,255] # Farbe (Weiß) für zu maskierenden Bereich + # cv.fillPoly(blanc, [mask], color) # zu maskierenden Bereich einfärben --> alles innerhalb der Maske bzw. weiße bleibt bestehen + # result = cv.bitwise_and(binary_img, blanc) # Binärbild maskieren, nur Probe mit inneren Poren bleibt bestehen + + ### Konturen suchen ### + # kein bluring oder binarisieren notwendig, wurde bei binary_image bereits angewendet + # contours, hierarchy = cv.findContours(result, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE) + contours, hierarchy = cv.findContours(binary_img, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE) + + ### Poren und innere Partikel separieren ### + # Partikel haben nicht die Probe als Elternkontur sondern die Poren + # Probenkontur finden + contour_area = [cv.contourArea(contour) for contour in contours] + specimen_index = [i for i in range(len(contours)) if contour_area[i] == max(contour_area)][0] + pores_index = [i for i, hierarch in enumerate(hierarchy[0]) if hierarch[3] == specimen_index] + # Konturflächenfilter + if sizeFilter != None: + for index in pores_index: + if contour_area[index] <= 5: + pores_index.remove(index) + # (pores_index.remove(index) for index in pores_index if contour_area[index] < sizeFilter) + + ### Konturen der Poren als Liste speichern ### + pores = [contours[index] for index in pores_index] + + ## Anzahl und Größe der inneren Partikel für jede Pore bestimmen + # Partikel mit Poren finden + particle_pores = [index for index in pores_index if hierarchy[0, index, 2] >= 0] + # Partikel aus allen Konturen selektieren + particles = [index for index in range(len(contours)) if hierarchy[0, index, 3] != specimen_index and hierarchy[0, index, 3] > 0] + # Die Elternkontur für jedes Partikel bestimmen + particle_parents = [hierarchy[0, particle, 3] for particle in particles] + # Anzahl an Partikeln in jeder Partikelpore bestimmen + particles_in_pores = [particle_parents.count(index) for index in particle_pores] + + ## Für jede Pore die Anzahl an Partikeln speichern + pores_particles = [None] * len(pores_index) + for i, index in enumerate(pores_index): + if index in particle_pores: + j = particle_pores.index(index) + pores_particles[i] = particles_in_pores[j] + else: + pores_particles[i] = 0 + + ## für jedes Partikel die Größe bestimmen + particle_size = [cv.contourArea(contours[particle]) for particle in particles] + ## Für jede Pore die Größen der enthaltenen Partikel als Subliste speichern + particle_sizes_pores = [None] * len(pores_index) + for i, index in enumerate(pores_index): + if index in particle_pores: + sizes = [particle_size[i] for i, parent in enumerate(particle_parents) if parent == index] + particle_sizes_pores[i] = sizes + else: + particle_sizes_pores[i] = [0] + ## Statistik zu den Partikelgrößen + max_particle_size_pore = [max(element) if element[0] != 0 else 0 for element in particle_sizes_pores] + min_particle_size_pore = [min(element) if element[0] != 0 else 0 for element in particle_sizes_pores] + mean_particle_size_pore = [sum(element)/len(element) if element[0] != 0 else 0 for element in particle_sizes_pores] + median_particle_size_pore = [np.median(element) if element[0] != 0 else 0 for element in particle_sizes_pores] + std_particle_size_pore = [np.std(element) if element[0] != 0 else 0 for element in particle_sizes_pores] + + ## Porengrößen + pore_areas = [contour_area[index] for index in pores_index] + ## Konturlängen + pore_perimeters = [cv.arcLength(contours[index], True) for index in pores_index] + ## Approximierte Kontur + + ## Konvexitätsfehler + pore_convex_hulls_ret = [cv.convexHull(contours[index], returnPoints=False) for index in pores_index] + pore_convex_hulls = [cv.convexHull(contours[index]) for index in pores_index] + convexity_defects = [cv.convexityDefects(contours[index], pore_convex_hulls_ret[i]) for i, index in enumerate(pores_index)] + pore_convexity_defects = [] + for convexity_defect in convexity_defects: + try: + pore_convexity_defects.append(len(convexity_defect)) + except: + pore_convexity_defects.append(0) + + convexity_defect_sizes = [] + for convexity_defect in convexity_defects: + try: + convexity_defect_sizes.append([defect[0][3] for defect in convexity_defect]) + except: + convexity_defect_sizes.append(0) + + max_convexity_defect = [] + min_convexity_defect = [] + mean_convexity_defect = [] + median_convexity_defect = [] + std_convexity_defect = [] + for convexity_defect in convexity_defect_sizes: + try: + max_convexity_defect.append(max(convexity_defect)) + min_convexity_defect.append(min(convexity_defect)) + mean_convexity_defect.append(sum(convexity_defect)/len(convexity_defect)) + median_convexity_defect.append(np.median(convexity_defect)) + std_convexity_defect.append(np.std(convexity_defect)) + except: + max_convexity_defect.append(0) + min_convexity_defect.append(0) + mean_convexity_defect.append(0) + median_convexity_defect.append(0) + std_convexity_defect.append(0) + + + ## Zirkularität + pore_circularities = [4*math.pi*area/(pore_perimeters[i])**2 for i, area in enumerate(pore_areas)] + + ## Solidität + pore_solidities = [pore_areas[i]/cv.contourArea(convex_hull) for i, convex_hull in enumerate(pore_convex_hulls)] + + ## Bounding Box + pore_bounding = [cv.boundingRect(contours[index]) for index in pores_index] + width_rect = [bounding[2] for bounding in pore_bounding] + height_rect = [bounding[3] for bounding in pore_bounding] + density_bounding_rect = [contour_area[index]/(width_rect[i]*height_rect[i]) for i, index in enumerate(pores_index)] + + ## Bounding Box mit minimaler Fläche + pore_min_area_bounding = [cv.minAreaRect(contours[index]) for index in pores_index] + width_min_rect = [bounding[1][0] for bounding in pore_min_area_bounding] + height_min_rect = [bounding[1][1] for bounding in pore_min_area_bounding] + density_min_rect = [contour_area[index]/(width_min_rect[i]*height_min_rect[i]) for i, index in enumerate(pores_index)] + + ## Porenrotation + pore_rotations = [] + for i, element in enumerate(pore_min_area_bounding): + # element.width < element.heigth + if element[1][0] < element[1][1]: + pore_rotations.append(abs(pore_min_area_bounding[i][2]) + 90) + else: + pore_rotations.append(abs(pore_min_area_bounding[i][2])) + + # Winkel bei Verdrehung der Probe korrigieren + pore_rotations_corr = [angle - cv.minAreaRect(contours[specimen_index])[2] for angle in pore_rotations] + + ## Minimaler einschließender Kreis + min_circles = [cv.minEnclosingCircle(contours[index]) for index in pores_index] + radian_min_circle = [int(min_circle[1]) for min_circle in min_circles] + density_min_circle = [contour_area[index]/(math.pi*radian_min_circle[i]**2) for i, index in enumerate(pores_index)] + + particle_density = np.divide(pores_particles, [get_ContourArea_Microns(area) for area in pore_areas]) + defect_density = np.divide(pore_convexity_defects, [get_ContourArea_Microns(area) for area in pore_areas]) + + ## Mittelpunktkoordinaten auslesen (von min_area_rect) ### + # print(pore_bounding[0]) + x_coordinates = [bounding[0] for bounding in pore_bounding] + y_coordinates = [bounding[1] for bounding in pore_bounding] + + features = {'Pore_Index': pores_index, + 'x_Coordinate': x_coordinates, + 'y_Coordinate': y_coordinates, + 'No_Particles': pores_particles, + 'Particle_Density': particle_density, + 'Max_Particle': [get_ContourArea_Microns(element) for element in max_particle_size_pore] , + 'Min_Particle': [get_ContourArea_Microns(element) for element in min_particle_size_pore], + 'Mean_Particle': [get_ContourArea_Microns(element) for element in mean_particle_size_pore], + 'Median_Particle': [get_ContourArea_Microns(element) for element in median_particle_size_pore], + 'STD_Particle': [get_ContourArea_Microns(element) for element in std_particle_size_pore], + 'Area': [get_ContourArea_Microns(element) for element in pore_areas], + 'Area_PX': pore_areas, + 'Perimeter': [get_Length_Mircons(element) for element in pore_perimeters], + 'Circularity': pore_circularities, + 'Solidity': pore_solidities, + 'Angle': pore_rotations, + 'Angle_corr': pore_rotations_corr, + # 'No_Convexity_Defects': pore_convexity_defects, + 'Defect_Density': defect_density, + 'Max_Convexity_Defect': [get_Length_Mircons(element) for element in max_convexity_defect], + 'Min_Convexity_Defect': [get_Length_Mircons(element) for element in min_convexity_defect], + 'Mean_Convexity_Defect': [get_Length_Mircons(element) for element in mean_convexity_defect], + 'Median_Convexity_Defect': [get_Length_Mircons(element) for element in median_convexity_defect], + 'STD_Convexity_Defect': [get_Length_Mircons(element) for element in std_convexity_defect], + 'Width_Rect': [get_Length_Mircons(element) for element in width_rect], + 'Height_Rect': [get_Length_Mircons(element) for element in height_rect], + 'Width_Min_Rect': [get_Length_Mircons(element) for element in width_min_rect], + 'Height_Min_Rect': [get_Length_Mircons(element) for element in height_min_rect], + 'Density_Rect': density_bounding_rect, + 'Density_min_Rect': density_min_rect, + 'Radius_Circle': [get_Length_Mircons(element) for element in radian_min_circle], + 'Density_min_Circle': density_min_circle, + } + + ## Zuschnitt jeder Pore in Ordner speichern + if save_pores == True: + rois = segment_Contours(bounding_rect=pore_bounding, binary_image=binary_img) + for i, roi in enumerate(rois): + cv.imwrite(save_path+'/'+str(pores_index[i])+'.jpg', roi) + + + return features, binary_img, pores + +##################################################################################################################################################### + +###### Kontur in leeres Bild zeichnen ###### +# Zum Überprüfen, wie die entsprechende Kontur aussieht +def plot_Contour(contours, image, contour_index, bounding_rect, save, crop, name): + ### leeres Bild erzeugen mit der Größe des Eingangsbildes ### + empty_segment = np.zeros_like(image) + + # gewünschte Konturen in einer Konturenliste speichern + # anhand der contour_index Angabe + cnt = [] + [cnt.append(contours[contour]) for i, contour in enumerate(contour_index)] + + # alle Konturen in der erstellten Liste plotten + segment = cv.drawContours(empty_segment, cnt, -1, (255,255,255), -1) + + ### Bild auf relevanten Bereich zuschneiden ### + # nur wenn crop=True, sonst wird Kotur in das Ursprungsbild geplottet + if crop == True: + segment = segment[int(bounding_rect[contour_index][1]): int(bounding_rect[contour_index][1]+int(bounding_rect[contour_index][3])), \ + int(bounding_rect[contour_index][0]): int(bounding_rect[contour_index][0])+int(bounding_rect[contour_index][2])] + + if save == True: + cv.imwrite('Images/segmented_contour_{}.jpg'.format(name), segment) + + fig = plt.figure(figsize=(8,8)) + plt.imshow(segment, cmap='gray') + plt.axis('off') + plt.title('Segmentierte Kontur {}'.format(name)) + plt.show() + +##################################################################################################################################################### + +####### Konturfläche als Mikrometer ausgeben lassen ###### +def get_ContourArea_Microns(area): + one_micron = 1.79173 # Umrechnungsfaktor PX in Mikrometer --> mithilfe der BA Bildanalyse bestimmt + one_sq_micron = one_micron**2 # Umrechnung in Quadratmikrometer + + area_contour_microns = area/one_sq_micron # Umrechnun der Flöche in Quadratmikrometer + + return area_contour_microns + +##################################################################################################################################################### + +###### Längenmaße in Mikrometer umrechnen ###### + +def get_Length_Mircons(data): + one_micron = 1.79173 + data_microns = one_micron * data + + return data_microns + +##################################################################################################################################################### + +###### relative Dichte berechnen ###### +# Berechnung der Dichte aus der Fläche der Probenkontur und der der inneren Poren +# Partikel innerhalb von Poren haben so keine Auswirkung auf die Dichte +# zweiten Dichtewert mit inneren Partikeln bestimmen (ggf. sinnvoll wenn Proben gehipt werden) +def get_relative_Density(specimen_contour, inner_pores): + + ### für die gesamte Probenfläche ### + specimen_area = round( 2000 * 1.79173 )**2 + pores_area = 0 + + for element in inner_pores: + pores_area += cv.contourArea(element) + + rel_density = (specimen_area - pores_area) / specimen_area * 100 + + return rel_density + +##################################################################################################################################################### + +###### Probe in Kern und Randbereich zerlegen ###### +# Ziel ist zwei Binärbilder mit der Ursprungsgröße zu erzeugen +# Dabei ist jeweils einmal die Kontur und einmal der Kern weiß mit schwarzen Poren +# die Binärbilder können anschließend wie das Ursprungsbild durch die Funktionen untersucht werden + +### I N P U T S ### +# contours: alle Konturen die in dem Originalbild gefunden wurden +# contours_index: Indizes aller Poren +# specimen_contour: die spezifische Contour der Probe +# binary_image: das Eingabebild als Binärbild, um die gleiche Bildgröße zu kriegen +# relative_core_size: Die relative Fläche des Kernbereichs zur Boundingbox der Probenflöche +# relative_y_offset: relative Erweiterung des Kernbereichs in y-Richtung +def get_Core_Border(contours, contours_index, specimen_contour, binary_image, relative_core_size, relative_y_offset): + ### Flächenschwerpunkt der Probe finden ### + # Ausgangspunkt für Centercropping + ## Momente der Probenkontur bestimmen ## + moments = cv.moments(specimen_contour) + ## Koordinaten des Zentrums bestimmen ## + x_center = int(moments['m10']/moments['m00']) + y_center = int(moments['m01']/moments['m00']) + + ### Porenkonturen ### + contours_des = [contours[index] for index in contours_index] + + ## Kernbereich segmentieren ## + core_img = get_Core(specimen_contour, relative_core_size, relative_y_offset, binary_image, x_center, y_center, contours_des) + + ## Konturbereich segmentieren ## + border_img = get_Border(specimen_contour, relative_core_size, relative_y_offset, binary_image, x_center, y_center, contours_des) + + return core_img, border_img + + +def get_Core(specimen_contour, relative_core_size, relative_y_offset, binary_image, x_center, y_center, contours_des): + ### Kernbereich segmentieren ### + # Bereich wird relativ zur Boundingbox als rechteckiger Centercrop gewählt + ## Shape von der Probenfläche bestimmen + specimen_boundingRect = cv.boundingRect(specimen_contour) # = (x, y, w, h) + specimen_height = specimen_boundingRect[3] + specimen_width = specimen_boundingRect[2] + ## Zuschnittsgröße festlegen ## + core_width = relative_core_size * specimen_width + core_height = relative_core_size * specimen_height + core_height = core_height+relative_y_offset*core_height + ## Bild zusammenbauen ## + plane_img = np.zeros_like(binary_image) + core_img = plane_img + core_img = cv.rectangle(core_img, (int(x_center-core_width/2), int(y_center-core_height/2)), + (int(x_center+core_width/2), int(y_center+core_height/2)), (255,255,255), -1) + core_img = cv.drawContours(core_img, contours_des, -1, (0,0,0), -1) + core_img = core_img + + return core_img + +def get_Border(specimen_contour, relative_core_size, relative_y_offset, binary_image, x_center, y_center, contours_des): + ## Shape von der Probenfläche bestimmen + specimen_boundingRect = cv.boundingRect(specimen_contour) # = (x, y, w, h) + specimen_height = specimen_boundingRect[3] + specimen_width = specimen_boundingRect[2] + ## Zuschnittsgröße Kern festlegen ## + core_width = relative_core_size * specimen_width + core_height = relative_core_size * specimen_height + core_height = core_height+relative_y_offset*core_height + ## Bild zusammenbauen ## + plane_img = np.zeros_like(binary_image) + border_img = plane_img + border_img = cv.drawContours(border_img, [specimen_contour], 0, (255,255,255), -1) + border_img = cv.drawContours(border_img, contours_des, -1, (0,0,0), -1) + border_img = cv.rectangle(border_img, (int(x_center-core_width/2), int(y_center-core_height/2)), + (int(x_center+core_width/2), int(y_center+core_height/2)), (0,0,0), -1) + + return border_img + +##################################################################################################################################################### + +###### Größen bestimmen ###### +# Ausreißer bzgl. der Porengröße finden +# Einfluss von Ausreißern auf die Dichte/Porosität untersuchen +def get_Sizes(contour): + + ### Flächenberechnungen ### + area_contour = get_ContourArea_Microns(cv.contourArea(contour)) + + return area_contour + +##################################################################################################################################################### + +###### Mittelpunkt(-abstand) und Schwerpunkt(-abstand) ###### +def get_Positions(x_coordinates, y_coordinates, contour_areas): + + ### Mittelpunkt von allen Porenzentren ### + x_mean = sum(x_coordinates) / len(x_coordinates) + y_mean = sum(y_coordinates) / len(y_coordinates) + + ### Mittelpunkt von allen Porenzentren gewichtet mit der Konturflöche ### + x_gravities = [x_coordinates[i]*contour_areas[i] for i in range(len(contour_areas))] + y_gravities = [y_coordinates[i]*contour_areas[i] for i in range(len(contour_areas))] + x_gravity = sum(x_gravities) / sum(contour_areas) + y_gravity = sum(y_gravities) / sum(contour_areas) + + ### Ausgabe der Verschiebung in Prozent ### + x_diff = (x_mean-x_gravity)/x_mean*100 + y_diff = (y_mean-y_gravity)/y_mean*100 + + ### Entfernungen der Porenzentren ### + ## zum Mittelpunkt ## + x_distances = [abs(x_mean-coordinate) for coordinate in x_coordinates] + y_distances = [abs(y_mean-coordinate) for coordinate in y_coordinates] + # Länge der direkten Verbinungslinien + z_distances = [(x**2 + y_distances[i]**2)**(1/2) for i, x in enumerate(x_distances)] + ## zum Schwerpunkt ## + x_distances_gravity = [abs(x_gravity-coordinate) for coordinate in x_coordinates] + y_distances_gravity = [abs(y_gravity-coordinate) for coordinate in y_coordinates] + # Länge der direkten Verbinungslinien + z_distances_gravity = [(x**2 + y_distances_gravity[i]**2)**(1/2) for i, x in enumerate(x_distances_gravity)] + + ### Rückgabe der Positionen und Entfernungen ### + positions = {'center': (x_mean, y_mean), + 'center_of_mass': (x_gravity, y_gravity), + 'center_distances': (x_distances, y_distances), + 'center_of_mass_distances': (x_distances_gravity, y_distances_gravity)} + + return positions + +##################################################################################################################################################### + +###### Statistische Werte einer Liste berechnen ###### +def get_Statistics(data): + no_elements = len(data) + maximum = max(data) + minimum = min(data) + average = sum(data) / len(data) + median = np.median(data) + standard_deviation = np.std(data) + varianz = sum((element-average)**2 for element in data) / len(data) + skew_val = skew(data) + q1 = np.percentile(data, 25) + q3 = np.percentile(data, 75) + # z_score = [(element-average) / standard_deviation for element in data] + unique = len(np.unique(data)) + + ### Dictionary mit allen Werten anlegen ### + statistics = {'NoElements': no_elements, + 'Unique_Elements': unique, + 'Maximum': maximum, + 'Minimum': minimum, + 'Average': average, + 'Median': median, + 'STD': standard_deviation, + 'Varianz': varianz, + 'Skewness': skew_val, + # 'Z_Score': z_score, + 'Q1': q1, + 'Q3': q3} + + return statistics + +##################################################################################################################################################### + +###### Größengewichtete statistische Werte ableiten ###### +# def get_Weighted_Statistics(): + +##################################################################################################################################################### +###### Ausreißer detektieren ###### +def get_Outliers(data): + + ### Interquartilsabstand ### + q1 = np.percentile(data, 25) + q3 = np.percentile(data, 75) + + q_distance = q3-q1 + + iq_high = q3+1.5*q_distance + iq_low = q1-1.5*q_distance + + iq_outliers = [] + for element in data: + if element > iq_high: + iq_outliers.append(element) + if element < iq_low: + iq_outliers.append(element) + + ### Standardabweichung ### + std = np.std(data) + mean = sum(data)/len(data) + + std_high = mean + 3 * std # 99.7 % der Datenpunkte liegen innerhalb von 3 Standardabweichungen --> Ausreißer liegen bei circa 0.3 % + std_low = mean - 3 * std + + std_outliers = [] + for element in data: + if element > std_high: + std_outliers.append(element) + if element < std_low: + std_outliers.append(element) + + ### z-Score ### + z = [(element - mean) / std for element in data] + + z_outliers = [] + for element in data: + if element > 3: + z_outliers.append(element) + if element < -3: + z_outliers.append(element) + + outliers = {'IQ': len(iq_outliers), + 'STD': len(std_outliers), + 'Z': len(z_outliers)} + + return outliers + +##################################################################################################################################################### + +###### Umlaufende Histogramme ###### +# Histogramme von 0° - 180° erzeugen, ggf. nur 0° und 90° +# Bestimmung der statistischen Kenngrößen +# so können Informationen über die lokalität der Poren gewonnen werden + +# Zeilen- und Spaltenweise die Anzahl an schwarzen Pixeln bestimmen --> Welche Zeile/ Spalte hat welchen Schwarzanteil, ggf. relativ umsetzbar +# Statistische Größen aus dem resultierenden Vektor bestimmen +def get_Position_Histograms(binary_image): + img_height = binary_image.shape[0] + img_width = binary_image.shape[1] + + vertical_histogram = np.sum(binary_image==255, axis=1).tolist()#[np.sum(binary_image[row]==0) for row in range(img_height)] + + # print('Bildhöhe: {}, Länge vertikales Histogramm: {}'.format(img_height, len(vertical_histogram))) + + horizontal_histogram = np.sum(binary_image==255, axis=0).tolist() + + # print('Bildbreite: {}, Länge horizontales Histogramm: {}'.format(img_width, len(horizontal_histogram))) + + return vertical_histogram, horizontal_histogram + +##################################################################################################################################################### + +###### Repräsentative Pore erzeugen ###### +# innere Poren aus originalem Binärbild ausschneiden +# Größten Zuschnitt ermitteln und Bild aus [None]s erzeugen +# segmentierte Poren ggf. invertieren +# Poren in weiß (255) in das None einzeichnen +# Bild an Funktion zum überlagern übergeben +# alle Porenbilder überlagern +## ! https://stackoverflow.com/questions/17291455/how-to-get-an-average-picture-from-100-pictures-using-pil ! ## + +# Primär sinnvoll für alle großen Poren (z.B. die Ausreißer die wesentlich Größer sind) +def get_Average_Pore(segmented_Contours, contour_Index, threshold, plot): + + ### nur die Contouren der inneren Poren segmentieren ### + contours_des = [] + [contours_des.append(segmented_Contours[index]) for index in contour_Index] + + ### Breiten und Höhen der Boundingboxen aller Poren bestimmen ### + widths = [] + [widths.append(contours_des[i].shape[1]) for i in range(len(contours_des))] + heights = [] + [heights.append(contours_des[i].shape[0]) for i in range(len(contours_des))] + + ### leeres Bild erzeugen ### + shape = np.ones([max(heights), max(widths)]) * 255 + avr_img = np.zeros([max(heights), max(widths)]) + + ### Mittlere Pixelintensität bestimmen ### + for contour in contours_des: + scaled = np.ones_like(shape)*255 # leere Skalierungsmatrix erzeugen + + ## Fehlende Zeilen und Spalten bestimmen ## + missing_left = int((shape.shape[1] - contour.shape[1]) / 2) + missing_up = int((shape.shape[0] - contour.shape[0]) / 2) + + ## Werte der Contour in scaled eintragen ## + # Start (oben links) in shape ist [missing_up-1, missing_left-1] + # len(contour) = Zeilen/Höhe + # len(contour[0]) = Spalten/Breite + for i in range(len(contour)): + for j in range(len(contour[0])): + scaled[missing_up-1+i][missing_left-1+j] = contour[i][j] + + ## Mittelwertbild erzeugen ## + avr_img = avr_img + scaled/len(contours_des) + + ### Mittelwertbild binarisieren ### + _, avr_img = cv.threshold(avr_img, threshold, 255, cv.THRESH_BINARY) + + ### Average Pore plotten ### + if plot == True: + plt.imshow(avr_img, cmap='gray') + plt.axis('off') + plt.title('Average Pore') + plt.show() + + ### Mittelwertbild zurückgeben ### + return avr_img + +##################################################################################################################################################### + +###### Repräsentative Pore erzeugen mit Gewichtung der Porengröße in die Überlagerung ###### +def get_Average_Pore_weighted(contours, segmented_Contours, contour_Index, threshold, plot): + + if contour_Index != -1: + ### nur die Contouren der inneren Poren segmentieren ### + contours_des = [] + [contours_des.append(segmented_Contours[index]) for index in contour_Index] + contours_des_area = [contours[index] for index in contour_Index] + contours_area = [cv.contourArea(contour) for contour in contours_des_area] + else: + contours_des = segmented_Contours + contours_area = [cv.contourArea(contour) for contour in contours] + + ### Breiten und Höhen der Boundingboxen aller Poren bestimmen ### + widths = [] + [widths.append(contours_des[i].shape[1]) for i in range(len(contours_des))] + heights = [] + [heights.append(contours_des[i].shape[0]) for i in range(len(contours_des))] + + ### leeres Bild erzeugen ### + shape = np.ones([max(heights), max(widths)]) * 255 + avr_img = np.zeros([max(heights), max(widths)]) + + + ### Mittlere Pixelintensität bestimmen ### + for index, contour in enumerate(contours_des): + scaled = np.ones_like(shape)*255 # leere Skalierungsmatrix erzeugen + + ## Fehlende Zeilen und Spalten bestimmen ## + missing_left = int((shape.shape[1] - contour.shape[1]) / 2) + missing_up = int((shape.shape[0] - contour.shape[0]) / 2) + + ## Werte der Contour in scaled eintragen ## + # Start (oben links) in shape ist [missing_up-1, missing_left-1] + # len(contour) = Zeilen/Höhe + # len(contour[0]) = Spalten/Breite + for i in range(len(contour)): + for j in range(len(contour[0])): + scaled[missing_up-1+i][missing_left-1+j] = contour[i][j] + + ## Mittelwertbild erzeugen ## + avr_img = avr_img + scaled * contours_area[index] / sum(contours_area) + + ### Mittelwertbild binarisieren ### + _, avr_img = cv.threshold(avr_img, threshold, 255, cv.THRESH_BINARY) + + ### Average Pore plotten ### + if plot == True: + plt.imshow(avr_img, cmap='gray') + plt.axis('off') + plt.title('Average Pore weighted by Poresize') + plt.show() + + ### Mittelwertbild zurückgeben ### + return avr_img + +##################################################################################################################################################### + +###### lokale Dichte bestimmen ###### +# Zerlegung des Bildes in Quadrate +# Konturen in Quadrat bestimmen +# Anzahl an Poren, Form, Typ und Größe bestimmen +# relative Dichte im Quadrat bestimmen + +##################################################################################################################################################### \ No newline at end of file diff --git a/analyzer/features.py b/analyzer/features.py new file mode 100644 index 0000000..e5321c9 --- /dev/null +++ b/analyzer/features.py @@ -0,0 +1,440 @@ +######################################################## F E A T U R E E X T R A K T I O N ######################################################## +###### ###### +###### A U T H O R I N F O R M A T I O N S ###### +###### Mika Leon Altmann ###### +###### 31th of March, 2023 ###### +###### Leibniz-Institute for Materials Science, Bremen, Germany ###### +###### ###### +###### D E S C R I B T I O N ###### +###### Feature exraction of micrographs for powder bed fusion with laser beam of metals. ###### +###### ###### +##################################################################################################################################################### + +import cv2 as cv +import numpy as np +from numpy.linalg import norm +from scipy.stats import skew +import pandas as pd +from heatmap import heatmap, corrplot + +from sklearn.neighbors import NearestNeighbors +from kneed import KneeLocator +from sklearn.cluster import DBSCAN +from sklearn import metrics +from sklearn.decomposition import PCA +from mpl_toolkits import mplot3d +from mpl_toolkits.mplot3d import axes3d +from sklearn.preprocessing import QuantileTransformer + +import matplotlib.pyplot as plt +import os + + +# Klasse zur Beschreibung der Porenfeatures (lokale Features) wie der Größe, Form und Position +class Pore(): + def __init__(self, contour, scale=1.79173): + self.contour = contour + self.scale = scale + self.area = self.__area() + self.perimeter = self.__perimeter() + self.convex_hull = self.__convex_hull() + self.convexity_defects = self.__convexity_defects() + self.defect_density = self.__defect_density() + self.mean_defect = self.__mean_defect() + self.solidity = self.__solidity() + self.bounding_box = self.__bounding_box() + self.x = self.bounding_box[0] + self.y = self.bounding_box[1] + self.label = None + + def __area(self): + area = cv.contourArea(self.contour) + return area / (self.scale**2) + + def __perimeter(self): + return cv.arcLength(self.contour, True) / self.scale + + def __convex_hull(self): + convex_hull_ret = cv.convexHull(self.contour, returnPoints=False) # returning indices of the contour points making the convex hull + convex_hull = cv.convexHull(self.contour) # returning the coordinates of the point making the convex hull + return convex_hull_ret, convex_hull + + def __convexity_defects(self): + convexity_defects = cv.convexityDefects(self.contour, self.convex_hull[0]) + return convexity_defects + + def __defect_density(self): + try: + defects = len(self.convexity_defects) + except: + defects = 0 + + defect_density = defects / self.area * 100 + return defect_density + + def __mean_defect(self): + # print('pore') + # print(self.convexity_defects) + try: + defects = self.__defect_size() + except: + defects = [0] + + mean_defect = sum(defects) / len(defects) + return mean_defect / self.scale + + def __defect_size(self): # Berechnung der Größe der Konvexitätsfehler --> Bug in der openCV Berechnung --> Faktor 255 größer als wirklich + s = [ dfct[0][0] for dfct in self.convexity_defects ] + e = [ dfct[0][1] for dfct in self.convexity_defects ] + d = [ dfct[0][2] for dfct in self.convexity_defects ] + distance = [ norm(np.cross(self.contour[s[idx]][0]-self.contour[e[idx]][0], self.contour[e[idx]][0] - self.contour[d[idx]][0])/norm(self.contour[s[idx]][0]-self.contour[e[idx]][0])) for idx in range(len(d)) ] + return distance + + + def __solidity(self): + solidity = self.area / (cv.contourArea(self.convex_hull[1])/self.scale**2) + return solidity + + def __bounding_box(self): + bounding_box = cv.boundingRect(self.contour) + return bounding_box + + def set_label(self, label): + self.label = label + + +# Klasse zur allgemeinen Beschreibung eines Schliffbildes, Zuschnitt, Binarisierung, Skalierung und Poren +class MicrographBase(): + def __init__(self, image, scale=1.79173, cropsize_microns=2000): + self.img = image + self.scl = scale + self.crpsz = cropsize_microns + self.bnry = self.__binary() + self.cnt_crp = self.__center_crop() + self.cnt_hrrchy = self.__contours_hierarchy() + self.prs = self.__pores() + self.img_cnt = self.__img_contour() + + def __binary(self): + try: + image = cv.cvtColor(self.img, cv.COLOR_RGB2GRAY) + except: + image = self.img + + img_blur = cv.GaussianBlur(image, (5, 5), 0) + threshold, img_binary = cv.threshold(img_blur, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU) + return img_binary + + def __center_crop(self): + height = self.bnry.shape[0] + width = self.bnry.shape[1] + + w = round( self.crpsz * self.scl ) + h = round( self.crpsz * self.scl ) + + x = round( width/2 - w/2 ) + y = round( height/2 - h/2 ) + + binary_img = self.bnry[y:y+h, x:x+w] + binary_img = cv.copyMakeBorder(binary_img, 1, 1, 1, 1, cv.BORDER_CONSTANT, None, value=255) + binary_img = cv.copyMakeBorder(binary_img, 1, 1, 1, 1, cv.BORDER_CONSTANT, None, value=0) + return binary_img + + def __contours_hierarchy(self): + cnt, hrrchy = cv.findContours(self.cnt_crp, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE) + return cnt, hrrchy[0] + + def __pores(self): + # Überprüfung ob die gefundenen Konturen die Probenkontur als Elternkontur haben --> nur dann Pore + prs_cnts = [ cnt for idx, cnt in enumerate(self.cnt_hrrchy[0]) if self.cnt_hrrchy[1][idx][3] == 0 and idx != 0 ] + prs = [] + for cnt in prs_cnts: + prs.append(Pore(cnt, self.scl)) + return prs + + def __img_contour(self): + img_cnt = cv.cvtColor(self.cnt_crp, cv.COLOR_GRAY2RGB) + for idx, pore in enumerate(self.prs): + # x, y = tuple(cnt[cnt[:, :, 0].argmin()][0]) + x = pore.x + y = pore.y + img_cnt = cv.putText(img_cnt, str(idx), (x, y-round(0.01*self.crpsz)), cv.FONT_HERSHEY_SIMPLEX, 0.45, 255, 1) + return img_cnt + + def set_center_crop(self, crop_size_microns=2000): # Zuschnittsgröße von außen verändern + self.crpsz = crop_size_microns + self.cnt_crp = self.__center_crop() + + def save_pore_segments(self, path=os.getcwd()): + path=path+'\Pores' + if not os.path.exists(path): + os.makedirs(path) + binary_image = self.cnt_crp + bounding_rect = [pore.bounding_box for pore in self.prs] + rois = [None]*len(bounding_rect) + + # Über alle Boundingboxen iterieren und die ROIs in dem Array speichern + for i, rect in enumerate(bounding_rect): + rois[i] = binary_image[int(bounding_rect[i][1]): int(bounding_rect[i][1]+int(bounding_rect[i][3])), + int(bounding_rect[i][0]): int(bounding_rect[i][0])+int(bounding_rect[i][2])] + + for i, roi in enumerate(rois): + cv.imwrite(path+'/'+str(i)+'.jpg', roi) + + print('Saved segmented pores. \n Path: {}'.format(path)) + + +# Klasse zur Beschreibung der statistischen Kenngrößen in einem Schliffbild, bzgl. der Poren +class Micrograph(MicrographBase): + def __init__(self, image, scale=1.79173, cropsize_microns=2000): + super().__init__(image, scale, cropsize_microns) + self.rltv_dnsty = self.__relative_density() + self.pr_dnsty = self.__pore_density() + self.sldty = self.calc_stats([pore.solidity for pore in self.prs]) + self.area = self.calc_stats([pore.area for pore in self.prs]) + self.prmtr = self.calc_stats([pore.perimeter for pore in self.prs]) + self.dfct_dnsty = self.calc_stats([pore.defect_density for pore in self.prs]) + self.mn_dfct = self.calc_stats([pore.mean_defect for pore in self.prs]) + + def __relative_density(self): + area_mcrgrph = self.crpsz**2 + area_pr = sum( [ pr.area for pr in self.prs ] ) + rltv_dnsty = 100 - area_pr / area_mcrgrph * 100 + return rltv_dnsty + + def __pore_density(self): + nbr_prs = len(self.prs) + pr_dnsty = nbr_prs / (self.crpsz**2) * 100 + return pr_dnsty + + def calc_stats(self, lst): + stts = {'Count': len(lst), + 'Unique': len(np.unique(lst)), + 'Max': max(lst), + 'Min': min(lst), + 'Mean': sum(lst) / len(lst), + 'Median': np.median(lst), + 'Std_Dev': np.std(lst), + 'Varianz': sum((elmnt-(sum(lst)/len(lst)))**2 for elmnt in lst) / len(lst), + 'Skewness': skew(lst)} + return stts + + def stats(self): + lst = [self.sldty, self.area, self.prmtr, self.dfct_dnsty, self.mn_dfct] + stats = pd.DataFrame.from_records(lst, index=['Solidity', 'Area', 'Perimeter', 'Defect Density', 'Mean Defect']).round(decimals=3) + return stats + + + +# Klasse zur Beschreibung der Schliffbilder mit Identifizierung der typischen und atypischen Poren +class MicrographDBSCAN(Micrograph): + def __init__(self, image, scale=1.79173, cropsize_microns=2000, n_neighbors=2, min_samples = 3): + super().__init__(image, scale, cropsize_microns) + self.n_neighbors = ( n_neighbors if n_neighbors <= len(self.prs) else len(self.prs)) + self.min_samples = min_samples + self.pore_features = self.__pore_features() + self.pore_ftrs_nrmlzd = self.__normalization() + self.knee = self.__calc_knee() + self.pca = self.__pca() + self.pores_dbscan = self.__dbscan() + + def __pore_features(self): + ftrs = {'Area': [ pore.area for pore in self.prs ], + 'Solidity': [ pore.solidity for pore in self.prs ], + 'Perimeter': [ pore.perimeter for pore in self.prs ], + 'Defect_Density': [ pore.defect_density for pore in self.prs ], + 'Mean_Defect': [ pore.mean_defect for pore in self.prs ]} + + pores_ftrs = pd.DataFrame.from_dict(ftrs) + return pores_ftrs + + def __normalization(self): + pores_ftrs = self.pore_features.copy() + cols = pores_ftrs.columns.tolist() + ftrs = pores_ftrs[cols] + + sclr = QuantileTransformer(n_quantiles = pores_ftrs.shape[0]) + pores_ftrs[cols] = sclr.fit_transform(ftrs.values) + return pores_ftrs + + def __pca(self): + if len(self.prs) < 5: + raise CustomException('Less than 5 pores found, unable to calculate stats.') + pca_ = PCA(n_components=len(self.pore_ftrs_nrmlzd.columns.tolist()), random_state=2020) + pca_.fit(self.pore_ftrs_nrmlzd) + explanation = np.cumsum(pca_.explained_variance_ratio_ * 100) + + pca3 = PCA(n_components=3, random_state=2020) + pca3.fit(self.pore_ftrs_nrmlzd) + pores_pca3 = pca3.transform(self.pore_ftrs_nrmlzd) + return explanation, pores_pca3 + + def __calc_knee(self): + # finding best epsilon for DBSCAN: https://iopscience.iop.org/article/10.1088/1755-1315/31/1/012012/pdf + nbrs = NearestNeighbors(n_neighbors = self.n_neighbors, metric='euclidean').fit(self.pore_ftrs_nrmlzd) # Anzahl an Nachbarn sollte etwa das Doppelte von der Featureanzahl sein + neigh_dist, neigh_ind = nbrs.kneighbors(self.pore_ftrs_nrmlzd) + sort_neigh_dist = np.sort(neigh_dist, axis=0) + + k_dist = sort_neigh_dist[:, self.n_neighbors-1] + x = [i for i in range(len(k_dist))] + + kneedle = KneeLocator(x=x, y=k_dist, S=1.0, curve='concave', direction='increasing', online=True) + knee = kneedle.knee_y + return knee + + def __dbscan(self): + knee = ( self.knee if self.knee != None else 0.1 ) + db = DBSCAN(eps=knee, min_samples=self.min_samples).fit(self.pore_ftrs_nrmlzd) + labels = db.labels_ + + i = 0 + for element in labels: # Anzahl an Ausreißern zählen + if element==-1: + i+=1 + + if i > 0.1*len(labels): # Wenn Ausreißer Anzahl mehr als 10 % der Poren sind, alle Ausreißer zu Klasse 0 zuweisen + for i in range(len(labels)): + labels[i] = 0 + + for i, pore in enumerate(self.prs): # Label zu jeder Pore speichern + pore.set_label(labels[i]) + + n_clusters = len(set(labels)) - (1 if -1 in labels else 0) + n_noise = list(labels).count(-1) + + return n_clusters, n_noise + + def explain_pca(self): + print('The varianz of the pores captured by the principle components: \n 1 principle component: {:.3f} % \n 2 principle components: {:.3f} % \n 3 principle components: {:.3f} % \n 4 principle components: {:.3f} % \n 5 principle components: {:.3f} %'.format(self.pca[0][0], self.pca[0][1],self.pca[0][2], self.pca[0][3], self.pca[0][4])) + + def visualize_clusters(self): + labels = [pore.label for pore in self.prs] + + fig = plt.figure(figsize=(12,8)) + ax = plt.axes(projection="3d") + + sctt = ax.scatter3D(self.pca[1][:,0], self.pca[1][:,1], self.pca[1][:,2], c=labels, s=25, alpha=0.6, cmap='viridis') + + plt.title('3D Scatterplot: {:.2f} % of the variability captured'.format(self.pca[0][2])) + ax.set_xlabel('PC 1', labelpad=15, weight='bold') + ax.set_ylabel('PC 2', labelpad=10, weight='bold') + ax.set_zlabel('PC 3', labelpad=10, weight='bold') + ax.view_init(25, 10) + return fig + + def hist_pore_features(self, normalized=False, include_outliers=True): + if normalized == False: + data = self.__pore_features() + data['Label'] = [pore.label for pore in self.prs] + if include_outliers == False: + data = data[data['Label'] >= 0] + + else: + data = self.__normalization() + data['Label'] = [pore.label for pore in self.prs] + if include_outliers == False: + data = data[data['Label'] >= 0] + + data.hist(bins=30, figsize=(8,8)) + + def get_corr_plot(self, include_outliers=True): + data = self.__pore_features() + data['Label'] = [pore.label for pore in self.prs] + if include_outliers==True: + data = data + else: + data = data[data['Label'] >= 0] + + fig = plt.figure(figsize=(16,8)) + corr = data.corr(numeric_only=True) + corrplot(corr, size_scale=100, marker="s") + + def get_stats(self, include_outliers=True): + if include_outliers==True: + lst = [self.sldty, self.area, self.prmtr, self.dfct_dnsty, self.mn_dfct] + else: + solidity = self.calc_stats([pore.solidity for i, pore in enumerate(self.prs) if pore.label >= 0]) + area = self.calc_stats([pore.area for i, pore in enumerate(self.prs) if pore.label >= 0]) + perimeter = self.calc_stats([pore.perimeter for i, pore in enumerate(self.prs) if pore.label >= 0]) + defect_density = self.calc_stats([pore.defect_density for i, pore in enumerate(self.prs) if pore.label >= 0]) + mean_defect = self.calc_stats([pore.mean_defect for i, pore in enumerate(self.prs) if pore.label >= 0]) + + lst = [solidity, area, perimeter, defect_density, mean_defect] + + stats = pd.DataFrame.from_records(lst, index=['Solidity', 'Area', 'Perimeter', 'Defect Density', 'Mean Defect']).round(decimals=3) + return stats + + +class MicrographFullDescription(MicrographDBSCAN): #Ausreißer ein- oder ausschließen + def __init__(self, image, laserpower, scanspeed, hatchdistance, layerthickness, scale=1.79173, cropsize_microns=2000, n_neighbors=2, min_samples = 3): + super().__init__(image, scale, cropsize_microns, n_neighbors, min_samples) + self.laserpower = laserpower + self.scanspeed = scanspeed + self.hatchdistance = hatchdistance + self.layerthickness = layerthickness + self.global_ftrs = self.__global_ftrs() + self.global_stts = self.__global_stts() + self.local_ftrs = self.__local_frts() + + def __global_ftrs(self): + lst = {'LaserPower': self.laserpower, + 'ScanSpeed': self.scanspeed, + 'HatchDistance': self.hatchdistance, + 'LayerThickness': self.layerthickness, + 'RelativeDensity': self.rltv_dnsty, + 'PoreDensity': self.pr_dnsty, + 'ED': self.laserpower/(self.scanspeed*self.hatchdistance*self.layerthickness), + 'TE': self.laserpower/self.scanspeed, + 'CountOutliers': self.pores_dbscan[1], + 'CountClusters': self.pores_dbscan[0], + 'EpsilonDBSCAN': self.knee} + global_ftrs = pd.DataFrame(data=lst, index=[0]) + return global_ftrs + + def __local_frts(self): + local_ftrs = self.pore_features + local_ftrs['Label'] = [pore.label for pore in self.prs] + return local_ftrs + + def __global_stts(self): + keys = self.area.keys() + area = {} + solidity = {} + perimeter = {} + defect_density = {} + mean_defect = {} + + for key in keys: + area['Area'+key] = [self.area[key]] + solidity['Solidity'+key] = [self.sldty[key]] + perimeter['Perimeter'+key] = [self.prmtr[key]] + defect_density['DefectDensity'+key] = [self.dfct_dnsty[key]] + mean_defect['MeanDefect'+key] = [self.mn_dfct[key]] + + global_stts = pd.DataFrame.from_dict(area) + lst = [solidity, perimeter, defect_density, mean_defect] + for elmnt in lst: + global_stts = pd.concat([global_stts, pd.DataFrame.from_dict(elmnt)], axis=1) + + return global_stts + + def get_full_description(self, include_stats=True, include_pore_features=True): + if include_stats == True: + if include_pore_features == True: + data = pd.concat([self.local_ftrs, self.global_ftrs, self.global_stts], axis=1) + else: + data = pd.concat([self.global_ftrs, self.global_stts], axis=1) + else: + if include_pore_features == True: + data = pd.concat([self.local_ftrs, self.global_ftrs], axis=1) + else: + data = pd.concat([self.global_ftrs], axis=1) + + nans = data.isna().any().tolist() + cols = data.columns.tolist() + + for i, col in enumerate(cols): + if nans[i] == True: + data[col] = data[col][0] + + return data \ No newline at end of file