#!/usr/bin/env python

###############################################################################
#
# Copyright 2017 - 2019 NXP
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#
###############################################################################

# DESCRIPTION:		  Read the Healthy and Taped data from the all sheets and 
#					  apply the k-means cluster on it to get the trained data
# INPUT/S:			  Data_Collection_<time stamp>.xls
# OUTPUT/S:			  Centroid value

import tensorflow as tf
import pandas as pd
from pandas import DataFrame
import numpy as np
from xlwt import Workbook
import os
import sys

###################################################################
# General Configurations
###################################################################	
np_array_avgs = np.array([])

Sample_Value = ["X","Y","Z"]


###################################################################
# Class for the Excel operation
###################################################################	
class WriteExcel(object):
	def __init__(self,ExcelName):
		self.Excel = ExcelName
		self.wb = None
		self.sheet = None
		self.wb = Workbook()

	def Create_Excel(self,SheetName):
		self.sheet = self.wb.add_sheet(SheetName)
		
	def Write_Excel(self,raw,clm,value):
		self.sheet.write(raw,clm,value)
		
	def Close_Excel(self):
		self.wb.save(self.Excel)

###################################################################
# Function to find the average
###################################################################	
def Calculate_AVG(df):
	mean_x = 0
	global np_array_avgs
	for i in range (0,df.shape[0]):
		try:
			if int(df[i]):
				if(i <=4):
					mean_x = mean_x + df[i]
				if(i == 4):
					np_array_avgs = np.append(np_array_avgs,(mean_x/5))
				elif(i > 4):
					mean_x = mean_x - int(df[(i-5)])
					mean_x = mean_x + int(df[i])
					mean = mean_x/5
					np_array_avgs = np.append(np_array_avgs,mean)
		except:
			continue


###################################################################
# Main function
###################################################################	
if __name__ == "__main__":

	####################################################################
	# Parse the excel data base
	####################################################################
	print ("="*73)
	print ("="*32," Start ","="*32)
	print ("="*73)

	try:
		os.chdir("../DataBase")
		path = os.getcwd()
		for root, dirs, files in os.walk(path):
			for file in files:
				if file.endswith(".xls"):
					DataBase = (os.path.join(root, file))				
					xls = pd.ExcelFile(DataBase)
					xls.sheet_names
					sheet_to_df_map = {}
					for sheet_name in xls.sheet_names:
						sheet_to_df_map[sheet_name] = xls.parse(sheet_name)
					tempKey = sheet_to_df_map.keys()
					df_col_merged = None
					for key in tempKey:
						df_col_merged = pd.concat([df_col_merged,(sheet_to_df_map[key])],axis=0)
					train = df_col_merged
	
	
		for sample in Sample_Value:
			df = DataFrame(train[sample],columns = [sample])
			df = np.nan_to_num(df)
			lst = np.arange(0, df.shape[0], 1, int)
			np_array_rms = np.array([])
			no = 10
			array_rmsc = [0]*(no+1)
			j = 0
			
			####################################################################
			# Find the Moving Average (Filtering the Data)
			####################################################################
			
			Calculate_AVG(df)
			
			####################################################################
			# Find the RMS values of the filtered data (Feature Extraction)
			####################################################################
			for i in range(0,(np_array_avgs.shape[0])):
					array_rmsc[j] = np_array_avgs[i]
					j+=1
					if((i is not 0) and ((i%no) is 0)):
						np_array_rmsc = np.array(array_rmsc)
						rms = np.sqrt(np.mean(np.square(np_array_rmsc)))
						np_array_rms = np.append(np_array_rms,rms)
						j = 0
	
			lst = np.arange(0, np_array_rms.shape[0], 1, int)
			
			#####################################################################
			# Apply K-means clustering algorithm after feature extraction
			#####################################################################
			np_array_rms = np_array_rms.reshape([-1,1])
			shape_ = np_array_rms.shape[0]
			np_array_rms = np.int32(np_array_rms)
			
			clusters_n = 3
			iteration_n = 100
	
			# Convert the input array to tensor
			points = tf.convert_to_tensor(np_array_rms, dtype=tf.int32)
			points = tf.reshape(points,[shape_,-1])
			
			# Randomly select the centroids initially for first itteration
			centroids = tf.Variable(tf.slice(tf.random_shuffle(np_array_rms), [0, 0], [clusters_n,-1]))
	
			# Add a dimension for centroid and points to make subtraction operation possible
			points_expanded = tf.expand_dims(points, 0)
			centroids_expanded = tf.expand_dims(centroids, 1)
			sub = tf.subtract(points_expanded,centroids_expanded)
			square = tf.square(sub)
			distances = tf.reduce_sum(square,2)
			assignments = tf.argmin(distances, 0)
	
			means = []
			for c in range(clusters_n):
				means.append(tf.reduce_mean(
				  tf.gather(np_array_rms, 
							tf.reshape(
							  tf.where(
								tf.equal(assignments, c)
							  ),[1,-1])
						   ),reduction_indices=[1]))
			new_centroids = tf.concat(means, 0)
			update_centroids = tf.assign(centroids, new_centroids)
			
			init = tf.global_variables_initializer()
			
			with tf.Session() as sess:
				sess.run(init)
				for step in range(iteration_n):
					[_,_,_,_, centroid_values, points_values, assignment_values] = sess.run([sub,square,distances,update_centroids, centroids, points, assignments])
				print ("-"*30)
				print("Centroids for ",sample,"axis\n")
				print(centroid_values)
				print ("-"*30)
				
			np_array_avgs = np.array([])
			
	except:
		print ("\n*** DataBase directory is not existing ***")
		sys.exit()
		
	print ("="*73)
	print ("="*33," End ","="*33)
	print ("="*73)
	
