diff --git a/statusio-selector.js b/statusio-selector.js index 3ed50f8..c4dbdcf 100644 --- a/statusio-selector.js +++ b/statusio-selector.js @@ -1,14 +1,13 @@ // ==UserScript== -// @name MS AKS Tool Launcher -// @namespace https://microsoft.com -// @description Effort to simplify launching AKS support-related tools +// @name Status.io Selector +// @namespace https://status.io +// @description Effort to simplify configuration of incidents/maintenance windows // @require http://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js // @require http://ajax.googleapis.com/ajax/libs/jqueryui/1.11.1/jquery-ui.min.js // @require https://gist.github.com/raw/2625891/waitForKeyElements.js // @grant GM_addStyle // @grant GM_setClipboard -// @include /^https?://servicedesk\.microsoft\.com/.*$/ -// @include /^https?://servicedesk/.*$/ +// @include /^https?://app\.status\.io/dashboard/.*$/ // @version 0.0.1 // ==/UserScript== @@ -32,145 +31,8 @@ if (urlRegex.test($("ul").context.URL)) { const resourceGroup = resourceURI.split('/')[4]; const cluster = resourceURI.split('/')[8]; var template = ` -// Here’s are ALL the errors/messages for the AKS clusters in the resource group in the past 90 days - -union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity -| where subscriptionID contains "${subscription}" -| where resourceName contains "${cluster}" -| where level != "info" -| where PreciseTimeStamp > ago(90d) -| project PreciseTimeStamp, operationID, correlationID, level, suboperationName, msg +farts -// Here’s are the recent scale/upgrade operations – -union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity -| where subscriptionID contains "${subscription}" -| where resourceName contains "${cluster}" -| where msg contains "intent" or msg contains "Upgrading" or msg contains "Successfully upgraded cluster" or msg contains "Operation succeeded" or msg contains "validateAndUpdateOrchestratorProfile" // or msg contains "unique pods in running state" -| where PreciseTimeStamp > ago(90d) -| project PreciseTimeStamp, operationID, correlationID, level, suboperationName, msg - -// Shows the scale errors/messages for an AKS cluster using the operationID from the previous query -union cluster("Aks").database("AKSprod").FrontEndContextActivity, cluster("Aks").database("AKSprod").AsyncContextActivity -| where operationID == "" -| where level != "info" -| project PreciseTimeStamp, level, msg - -//Black box monitoring FIND fqdn by customer"s subscriptionID -cluster("aks").database("AKSprod").BlackboxMonitoringActivity -| where subscriptionID == "${subscription}" and resourceGroupName contains "${resourceGroup}" -| where PreciseTimeStamp > ago(9d) -| summarize by fqdn, resourceGroupName, resourceName, underlayName - -//Black box monitoring using fqdn to find where cluster is not healthy -cluster("aks").database("AKSprod").BlackboxMonitoringActivity -| where fqdn == "replacefqdn" -// | where (["state"] != "Healthy" or podsState != "Healthy" or resourceState != "Healthy" or addonPodsState != "Healthy") -| where PreciseTimeStamp > ago(20d) -| project fqdn, PreciseTimeStamp, agentNodeName, state, reason, podsState, resourceState, addonPodsState, agentNodeCount, provisioningState, msg, resourceGroupName, resourceName, underlayName -// | order by PreciseTimeStamp asc -// | render timepivot by fqdn, reason, agentNodeName, addonPodsState -| render timepivot by fqdn, agentNodeName, addonPodsState, reason -// | summarize count() by reason -// | sort by reason - -//Black box monitoring for cluster -cluster("aks").database("AKSprod").BlackboxMonitoringActivity -| where PreciseTimeStamp > ago(12h) and underlayName == "" -| where reason != "" -| summarize count() by reason | top 10 by count_ desc - -// Find Errors reported by ARM Failed - Deleted - Created -cluster("ARMProd").database("ARMProd").EventServiceEntries -| where subscriptionId == "${subscription}" -| where resourceUri contains "${cluster}" -| where TIMESTAMP > ago(3d) -| where status == "Failed" -| project PreciseTimeStamp, correlationId , operationId, operationName, properties - -// Get serviceRequestId of processes sent to cluster -cluster("ARMProd").database("ARMProd").HttpOutgoingRequests -| where httpMethod != "GET" -| where TIMESTAMP > ago(1d) -| where targetUri contains "${cluster}"// and targetUri contains "${subscription}" -| project TIMESTAMP, ActivityId, serviceRequestId , clientRequestId, failureCause, httpMethod , operationName, targetUri - -// Use the activityID from the previous query. -cluster("Azcrp").database("crp_allprod").ContextActivity -| where TIMESTAMP between (datetime(2018-08-17T07:57Z)..datetime(2018-08-17T09:28Z)) -| where subscriptionId == "${subscription}" -// | where activityId == "3817a3d4-7045-4db5-bc7f-45dbffe2166a" -// | where message contains "${cluster}" -// | where PreciseTimeStamp > ago(3d) // datetime(2018-07-31) -| project PreciseTimeStamp, activityId, traceLevel, message - -// claims name shows WHO requested or performed the action -cluster("ARMProd").database("ARMProd").EventServiceEntries -| where subscriptionId == "${subscription}" -| where resourceUri contains "${cluster}" -// | where claims contains "1d78a85d-813d-46f0-b496-dd72f50a3ec0" -// | where ActivityId == "3817a3d4-7045-4db5-bc7f-45dbffe2166a" -// | where operationName contains "delete" -| where TIMESTAMP between (datetime(2018-08-17T07:57Z)..datetime(2018-08-17T09:28Z)) -// | where claims contains "baead28c-2ce7-4550-83a5-5e6a2deb02b8" -// | where status == "Failed" -| project PreciseTimeStamp, claims, authorization, properties, resourceUri, operationName //, httpRequest, correlationId, operationId, Deployment, operationName -// | project PreciseTimeStamp, resourceUri , issuer, issuedAt - -// Get the PUT operation. This query also shows the command used (aks get-credentials, browse, scale, show, create) -cluster("Armprod").database("ARMProd").HttpIncomingRequests -| where subscriptionId == "${subscription}" -| where targetUri contains "${cluster}" -// | where authorizationAction contains "write" or authorizationAction contains "delete" -| where commandName contains "aks" and httpMethod == "PUT" -| where PreciseTimeStamp > ago(3d) -| project TIMESTAMP,httpMethod,commandName,failureCause,serviceRequestId,authorizationAction,errorCode,errorMessage,subscriptionId,correlationId,targetUri - -// Get the PUT operation -cluster("Armprod").database("ARMProd").HttpIncomingRequests -| where subscriptionId == "${subscription}" -| where targetUri contains "${cluster}" and authorizationAction contains "Clusters" -| where httpMethod == "PUT" -| where PreciseTimeStamp > ago(3d) // between (datetime(2018-07-16) .. datetime(2018-07-20)) -| project TIMESTAMP, commandName , serviceRequestId , httpMethod , authorizationAction , operationName - -// -cluster("Aks").database("AKSprod").FrontEndQoSEvents -| where subscriptionID contains "${subscription}" -| where resourceName contains "${cluster}" -// | where operationName !contains "delete" -| where PreciseTimeStamp > ago(3d) -// feature-gates will be broken when upgrading to 1.11.0+ Please code all cases against - 2835281 - for this issue (also added in the wiki) -// Chase has created doc for scenario where upgrade failing to 1.11 and nodes getting disappear. Here is the doc: https://www.csssupportwiki.com/index.php/curated:Azure/Virtual_Machine/Products/Azure_Kubernetes_Service/TSG/upgrade_to_1.11_NodesNotReady -// Customer can run this script on nodes which are missing -// az vm run-command invoke -g $MC_RG_NAME -n $NODE_NAME --command-id RunShellScript --scripts "sed -i "s/--feature-gates=Accelerators=true //" /etc/default/kubelet && systemctl daemon-reload && systemctl restart kubelet" -// But a roll is being fixed out , once fix has been rolled out they can retry the upgrade - -cluster("Aks").database("AKSprod").AsyncQoSEvents -| where subscriptionID == "${subscription}" -| where TIMESTAMP > ago(3d) -| where suboperationName == "Upgrading" and propertiesBag contains "1.11" -| extend bag = parse_json(propertiesBag) -| extend from_version = tostring(bag.k8sCurrentVersion) -| extend to_version = tostring(bag.k8sGoalVersion) -| where from_version !contains "1.11" and to_version contains "1.11" and resultCode == "NodesNotReady" - -//Black box monitoring for cluster -cluster("aks").database("AKSprod").BlackboxMonitoringActivity -| where PreciseTimeStamp > ago(1d) -| where fqdn contains "replacefqdn" -// | where ["state"] == "Unhealthy" -| summarize count(state) by bin(PreciseTimeStamp, 5min), state -| render timeline - -//Black box monitoring for cluster -cluster("aks").database("AKSprod").BlackboxMonitoringActivity -| where PreciseTimeStamp > ago(1d) -| where fqdn contains "replacefqdn" -| where state != "Healthy" -| project PreciseTimeStamp, state, provisioningState, reason, agentNodeCount, msg, resourceGroupName, resourceName, underlayName -| order by PreciseTimeStamp asc -// | render timeline - -cluster("Aks").database("AKSprod").AsyncQoSEvents | sample 10 `; const regionRegex = /- Location:.*/g; const regionlower = caseText.match(regionRegex)[0].split(' ')[2]; @@ -178,75 +40,6 @@ cluster("Aks").database("AKSprod").AsyncQoSEvents | sample 10 case 'australiaeast': region = "AustraliaEast"; break; - case 'brazilsouth': - region = "BrazilSouth"; - break; - case 'canadacentral': - region = "CanadaCentral"; - break; - case 'canadaeast': - region = "CanadaEast"; - break; - case 'centralindia': - region = "CentralIndia"; - break; - case 'centralus': - region = "CentralUS"; - break; - case 'eastasia': - region = "EastAsia"; - break; - case 'eastus': - region = "EastUS"; - break; - case 'eastus2': - region = "EastUs2"; - break; - case 'eastus2euap': - region = "EastUS2EUAP"; - break; - case 'francecentral': - region = "FranceCentral"; - break; - case 'japaneast': - region = "JapanEast"; - break; - case 'japanwest': - region = "JapanWest"; - break; - case 'northcentralus': - region = "NorthCentralUS"; - break; - case 'northeurope': - region = "NorthEurope"; - break; - case 'southafricanorth': - region = "SouthAfricaNorth"; - break; - case 'southcentralus': - region = "SouthCentralUS"; - break; - case 'southeastasia': - region = "SouthEastAsia"; - break; - case 'southindia': - region = "SouthIndia"; - break; - case 'uksouth': - region = "UkSouth"; - break; - case 'ukwest': - region = "UKWest"; - break; - case 'westcentralus': - region = "WestCentlraUS"; - break; - case 'westeurope': - region = "WestEurope"; - break; - case 'westus': - region = "WestUs"; - break; case 'westus2': region = "WestUs2"; break;