diff --git a/equinix/api-team/incidents/salvage-license-costs.org b/equinix/api-team/incidents/salvage-license-costs.org new file mode 100644 index 0000000..47283a8 --- /dev/null +++ b/equinix/api-team/incidents/salvage-license-costs.org @@ -0,0 +1,328 @@ +#+TITLE: Incident 2590 +#+AUTHOR: Adam Mohammed +#+DATE: May 2, 2024 + + +* Starting out + +There are CPUs missing their ProcessorComponent information. + +** Get a list of affected hardware + +#+BEGIN_SRC ruby +affected_servers = [] +Hardware::Server.find_in_batches do |hbatch| + hbatch.each do |h| + affected_servers << h unless h.components.any? { |c| c.type == "ProcessorComponent" } + end +end +#+END_SRC +#+DATE: +#+BEGIN_EXAMPLE +1685 total affected +#+END_EXAMPLE + +** Classify the affected hardware by class and plan + +#+BEGIN_SRC ruby +affected_server_types = Hash.new(0) +affected_servers.each do |h| + affected_server_types[h.class] += 1 +end +#+END_SRC + +#+BEGIN_EXAMPLE ruby +irb(main):269:0> affected_server_types +=> {"Hardware::StorageAppliance"=>170, "Hardware::Open19Node"=>195, "Hardware::Server"=>1319, "Hardware::StorageServer"=>1} +#+END_EXAMPLE + +#+BEGIN_SRC ruby +affected_plan_types = Hash.new(0) +affected_servers.each do |h| + next unless h.plan.present? + affected_plan_types[h.plan.slug.to_s] += 1 +end; nil + +#+END_SRC + +#+BEGIN_EXAMPLE ruby +{"storage.custom"=>102, + "m3.large.x86"=>329, + "c3.small.x86"=>120, + "m3.small.x86"=>143, + "n2.xlarge.x86"=>23, + "c2.medium.x86"=>124, + "c3.medium.x86"=>396, + "netapp.storage"=>16, + "m2.xlarge.x86"=>31, + "nvidia3.a100.medium"=>1, + "t3.small.x86"=>13, + "n3.xlarge.x86"=>155, + "w3amd.75xx24c.512.8160"=>102, + "s3.xlarge.x86"=>29, + "appliance.dell.hci.vxrail.opt-m.x86"=>12, + "m3.large.opt-c2"=>3, + "nvidia3.a30.medium"=>11, + "purestorage"=>6, + "a3.large.opt-s4a5n1.x86"=>17, + "nvidia3.a30.large"=>3, + "n3.xlarge.opt-m4"=>4, + "storage.dell"=>14, + "nvidia3.a40.medium"=>9, + "w3amd.7402p.256.8160"=>1, + "a4.lg"=>5, + "a3.large.x86"=>1, + "x.large.arm"=>1, + "w3amd.75xx24c.256.4320"=>1, + "npi.testing"=>1, + "m3.large.opt-c2m4"=>1, + "a3.large.opt-s4a1"=>1, + "w3amd.75xx24c.256.8160"=>1, + "c3.large.arm64"=>2} +#+END_EXAMPLE + + +** What hardware is missing plan information + +#+BEGIN_SRC ruby +missing_plan = [] +affected_servers.each do |h| + missing_plan << h unless h.plan.present? +end; nil +#+END_SRC + +#+BEGIN_EXAMPLE ruby +irb(main):289:0> missing_plan.pluck(:id, :type, :state) +=> +[["2556229f-3da0-4056-96dc-ce820af30ba3", "Hardware::Server", "enrolled"], + ["4ca367f4-33c2-494f-8227-bed6c0d8bd8d", "Hardware::Server", "enrolled"], + ["8504ffdf-24d7-453f-9a49-94a7cba3f9ae", "Hardware::StorageAppliance", "enrolled"], + ["8b383a51-2a45-4d02-aafa-f31b159e31b6", "Hardware::Server", "enrolled"], + ["a20a6442-7185-4c49-bfcf-5359fe22cd9f", "Hardware::StorageAppliance", "enrolled"], + ["e2ff6fec-a70a-42e6-afb1-93f57c6a30f1", "Hardware::Server", "enrolled"], + ["f9670617-0cde-4db6-94de-d7ec495881e7", "Hardware::StorageAppliance", "enrolled"]] +#+END_EXAMPLE + + +I think it's safe to not worry about these because customers can't deploy them yet. + +** What hardware plan versions don't have the required CPU information? + +#+BEGIN_SRC ruby + def valid_cpu_data?(hardware) + required_keys = ["cores", "count"] + return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present? + cpu_data = hardware.plan_version.specs["cpus"][0] + required_keys.map do |k| + cpu_data.keys.include? k + end.all? + end + + affected_plan_versions = Hash.new(0) + affected_servers.each do |h| + next unless h.plan_version.present? + affected_plan_versions[h.plan_version.slug] += 1 unless valid_cpu_data?(h) + end; nil +#+END_SRC + + + +** These are the the ones that currently are not being billed properly + +#+BEGIN_SRC ruby + broken_billables = [ + "39b7f377-af6d-437b-a99b-10d9d4fd7b53", + "d2deb4c8-446f-4679-a7f5-60edf7745e23", + "e9c50e27-9f74-477b-9210-0e277537a336", + "88a6bf4a-b63e-4c7e-8c20-5d5949ba62f9", + "5b205c53-af64-421e-b2b6-39f5923d4f3f", + "604c38d9-1f8c-4600-bb29-a0d5e1aa504a", + "d4914c80-c657-4ff2-86a1-8f41d90af0a9", + "f6f087f3-3e7c-457f-8943-a6864a8a0b97", + "88d2e8ee-6ec1-450b-9982-63d8220a1011", + "a47f38f9-c2ac-46ba-bb16-68e659b89183", + "e47a3d2e-13a0-444a-8164-ebe54fbc43b1", + "840ce4fd-a300-4a7b-96a3-140e0bf988b4", + "68e0feb1-8146-4b08-a591-15806a0f61a0", + "0e1ef1c6-2de7-40b3-91e0-44474f32fafb", + "161d4f10-4362-4028-b237-b7649f87eb09" + ] +#+END_SRC + + +** Do these pieces of hardware have the information I need to fix the data? + +#+BEGIN_SRC ruby + def my_valid_cpu_data?(hardware) + required_keys = ["cores", "count"] + return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present? + cpu_data = hardware.plan_version.specs["cpus"][0] + required_keys.map do |k| + cpu_data.keys.include? k + end.all? + end + + can_be_fixed = [] + + broken_hardware.each do |h| + next unless h.plan_version.present? + can_be_fixed << h if my_valid_cpu_data?(h) + end; nil +#+END_SRC + +** Actually fix the components + +#+BEGIN_SRC ruby + def create_processor_component(h_id, cpu_data, index) + cpu = ProcessorComponent.new + cpu.name = cpu_data["name"] + cpu.type = ProcessorComponent.to_s + cpu.vendor = cpu_data["manufacturer"] + cpu.model = cpu_data["model"] + cpu.serial = "CPU#{index}" + cpu.firmware_version = "N/A" + cpu.data = { + "clock" => cpu_data["speed"], + "cores" => cpu_data["cores"], + } + cpu.hardware_id = h_id + cpu + end +#+END_SRC + +#+BEGIN_SRC ruby + cant_fix = [] + finished = [] + broken_hardware.each_with_index do |h, i| + unless h.plan_version.present? && h.plan_version["cpus"].present? + cant_fix << h + next + end + + cpu_data = h.plan_version["cpus"][0] + core_count = h + c = create_processor_component(h.id, , +#+END_SRC + +#+BEGIN_SRC ruby + "04af7a5f-6330-4095-b525-ea8a596db035" + "111fc3d1-7002-4c22-9d29-e2539c610bb1" + "15a4071c-ddd9-4fc5-b9b9-35d5831a9de3" + "19798268-39ca-454e-a7de-cab1a9cae4a5" + "1df18ad3-3189-4b87-9654-7d9b062d553d" + "20388df4-c645-445c-8563-114213c85604" + "2cafd1cc-a6ba-4caf-849d-969ac22eddca" + "2cc2596e-8045-49ea-8274-5b84e27a643c" + "2d4941a3-f0ce-454c-b9dc-6f5bf3381519" + "2e13125c-9794-4392-ab7c-0dbb10b3b4f7" + "2e24c7dc-a219-45c2-ae79-1aa0eb367d56" + "2ffa9123-6466-49a3-ac81-84a7e0dcb437" + "35d423fa-e119-4c9b-8eed-9193a4037b18" + "39888ace-88cb-49f8-8eef-f1ec14c36d2c" + "4470e1bc-0c1e-47ac-99e0-8f23cc075228" + "56c91002-4e8e-4ab8-b653-d8fb459ad186" + "59daefde-f2c2-42c2-8bc9-90d5a00e98e9" + "5bf121bf-1b11-429b-9f73-11206e9f438c" + "5f81d1f6-9c7d-41b0-bb02-a4cb5b31b1ab" + "613d4464-8c0b-44a8-8bcc-9ece50b17ce5" + "62e344ed-2fe1-4778-92e8-0dd386cf0590" + "630cf74d-d689-496c-b29f-5f094c4455d5" + "649fa2b1-675c-4433-9256-e7632092ab8a" + "66f1ef27-3310-40c3-8d06-6c889ddc1e15" + "6ac54a10-c47d-446d-8ef5-d4131bdc746c" + "6c7e5828-68fe-4114-a8e8-1e3ce9747de0" + "773240e6-7f9b-472f-847e-0a9f914e4493" + "77f9ba1e-bcd4-46c9-963a-b861fb573ab2" + "7fe941fd-4533-411e-93ad-832632910cf2" + "858a0e53-56ec-4b77-b852-8371f3ead1bd" + "85b1ab1b-664b-4d0f-855e-30ccf7f16f50" + "921d04e8-b7b8-4e13-a9f3-f55302d970c1" + "9430eb5e-fbe2-48b0-b180-d94347a5f296" + "a172606f-4d90-41b8-a1f1-0cd1b20aaa7f" + "a8f5d150-0f5b-4a92-9583-0e70473a9b8b" + "a96d685d-b16f-4852-84dd-dd3304b37471" + "aa1f836f-5808-452a-a5bc-884acd3bcd90" + "abc678fd-d92b-4fc9-ad46-bc6316c170c6" + "afce0857-1016-4638-91bf-f67ee9ade423" + "b37bae11-645f-45cf-b55e-20604b5f3030" + "d263768d-c460-4bdd-81fa-c04fe80122cc" + "d4849d7e-8b68-4f14-97f6-0682c20d4706" + "d634a3b5-98ef-4eca-8fe3-3bc4903170c9" + "dff6b6b3-d46e-47c8-8c85-e85f2566893b" + "e248f2f0-b1dc-4e6e-b025-687ea375fe2d" + "e9e17d57-f8dd-4f8a-b31b-6e33c8e25078" + "fa97834e-d71e-4d8f-8fc0-2e8988a05a28" + "fb6c21a5-8640-4e1e-af18-2790f3a79873" +#+END_SRC + + +1. LicenseActivationID +2. Licensable (an Instance Model) +3. PlanVersion +4. CPU count and CPU cores +5. Update License.data["cores"] = cpu_count * cpu_cores + + +#+BEGIN_SRC ruby + # this doesn't save the things + def fix_core_count_prime(license_activation) + instance = license_activation.licensable + return "missing instance" unless instance.present? + plan_version = instance.plan_version + return "missing plan_version" unless plan_version.present? + + cpu_data = plan_version.specs["cpus"][0] + return "missing cpu_data" unless cpu_data.present? && cpu_data["cores"] && cpu_data["count"] + cpu_count = plan_version.specs["cpus"][0]["count"].to_i + cpu_cores = plan_version.specs["cpus"][0]["cores"].to_i + + license = license_activation.license + license.data["cores"] = cpu_count * cpu_cores + + license + end + + + res = broken_license_activations[2..].map do |la_id| + la = LicenseActivation.with_deleted.find(la_id) + return "couldn't find la #{la_id}" unless la.present? + fix_core_count_prime(la) + end + + # when I wanted to get the successful ones + res.filter { |item| item.is_a? License } + + # when I wanted to see what broke + res.filter { |item| !item.is_a? License } + +#+END_SRC + + +** Are there any windows licenses remaining with 0 cores, that aren't erroring yet? + + +#+BEGIN_SRC ruby + activations = LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").all + + missing_cores = activations.map do |la| + if la.license.data["cores"] == 0 + la + else + nil + end + end.compact + + + fixed_licenses = missing_cores.map do |la| + fix_core_count_prime(la) + end + + +#+END_SRC + +#+BEGIN_SRC ruby + irb(main):066:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer = 0").count + => 0 + irb(main):067:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer > 0").count + => 538 +#+END_SRC