Files
org-notes/equinix/api-team/incidents/salvage-license-costs.org

9.5 KiB

Incident 2590

Starting out

There are CPUs missing their ProcessorComponent information.

Get a list of affected hardware

affected_servers = []
Hardware::Server.find_in_batches do |hbatch|
  hbatch.each do |h|
    affected_servers << h unless h.components.any? { |c| c.type == "ProcessorComponent" }
  end
end
1685 total affected

Classify the affected hardware by class and plan

affected_server_types = Hash.new(0)
affected_servers.each do |h|
  affected_server_types[h.class] += 1
end
irb(main):269:0> affected_server_types
=> {"Hardware::StorageAppliance"=>170, "Hardware::Open19Node"=>195, "Hardware::Server"=>1319, "Hardware::StorageServer"=>1}
affected_plan_types = Hash.new(0)
affected_servers.each do |h|
  next unless h.plan.present?
  affected_plan_types[h.plan.slug.to_s] += 1
end; nil
{"storage.custom"=>102,
 "m3.large.x86"=>329,
 "c3.small.x86"=>120,
 "m3.small.x86"=>143,
 "n2.xlarge.x86"=>23,
 "c2.medium.x86"=>124,
 "c3.medium.x86"=>396,
 "netapp.storage"=>16,
 "m2.xlarge.x86"=>31,
 "nvidia3.a100.medium"=>1,
 "t3.small.x86"=>13,
 "n3.xlarge.x86"=>155,
 "w3amd.75xx24c.512.8160"=>102,
 "s3.xlarge.x86"=>29,
 "appliance.dell.hci.vxrail.opt-m.x86"=>12,
 "m3.large.opt-c2"=>3,
 "nvidia3.a30.medium"=>11,
 "purestorage"=>6,
 "a3.large.opt-s4a5n1.x86"=>17,
 "nvidia3.a30.large"=>3,
 "n3.xlarge.opt-m4"=>4,
 "storage.dell"=>14,
 "nvidia3.a40.medium"=>9,
 "w3amd.7402p.256.8160"=>1,
 "a4.lg"=>5,
 "a3.large.x86"=>1,
 "x.large.arm"=>1,
 "w3amd.75xx24c.256.4320"=>1,
 "npi.testing"=>1,
 "m3.large.opt-c2m4"=>1,
 "a3.large.opt-s4a1"=>1,
 "w3amd.75xx24c.256.8160"=>1,
 "c3.large.arm64"=>2}

What hardware is missing plan information

missing_plan = []
affected_servers.each do |h|
  missing_plan << h unless h.plan.present?
end; nil
irb(main):289:0> missing_plan.pluck(:id, :type, :state)
=>
[["2556229f-3da0-4056-96dc-ce820af30ba3", "Hardware::Server", "enrolled"],
 ["4ca367f4-33c2-494f-8227-bed6c0d8bd8d", "Hardware::Server", "enrolled"],
 ["8504ffdf-24d7-453f-9a49-94a7cba3f9ae", "Hardware::StorageAppliance", "enrolled"],
 ["8b383a51-2a45-4d02-aafa-f31b159e31b6", "Hardware::Server", "enrolled"],
 ["a20a6442-7185-4c49-bfcf-5359fe22cd9f", "Hardware::StorageAppliance", "enrolled"],
 ["e2ff6fec-a70a-42e6-afb1-93f57c6a30f1", "Hardware::Server", "enrolled"],
 ["f9670617-0cde-4db6-94de-d7ec495881e7", "Hardware::StorageAppliance", "enrolled"]]

I think it's safe to not worry about these because customers can't deploy them yet.

What hardware plan versions don't have the required CPU information?

  def valid_cpu_data?(hardware)
    required_keys = ["cores", "count"]
    return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present?
    cpu_data = hardware.plan_version.specs["cpus"][0]
    required_keys.map do |k|
	cpu_data.keys.include? k
      end.all?
  end

  affected_plan_versions = Hash.new(0)
  affected_servers.each do |h|
    next unless h.plan_version.present?
    affected_plan_versions[h.plan_version.slug] += 1 unless valid_cpu_data?(h)
  end; nil

These are the the ones that currently are not being billed properly

  broken_billables = [
       "39b7f377-af6d-437b-a99b-10d9d4fd7b53",
       "d2deb4c8-446f-4679-a7f5-60edf7745e23",
       "e9c50e27-9f74-477b-9210-0e277537a336",
       "88a6bf4a-b63e-4c7e-8c20-5d5949ba62f9",
       "5b205c53-af64-421e-b2b6-39f5923d4f3f",
       "604c38d9-1f8c-4600-bb29-a0d5e1aa504a",
       "d4914c80-c657-4ff2-86a1-8f41d90af0a9",
       "f6f087f3-3e7c-457f-8943-a6864a8a0b97",
       "88d2e8ee-6ec1-450b-9982-63d8220a1011",
       "a47f38f9-c2ac-46ba-bb16-68e659b89183",
       "e47a3d2e-13a0-444a-8164-ebe54fbc43b1",
       "840ce4fd-a300-4a7b-96a3-140e0bf988b4",
       "68e0feb1-8146-4b08-a591-15806a0f61a0",
       "0e1ef1c6-2de7-40b3-91e0-44474f32fafb",
       "161d4f10-4362-4028-b237-b7649f87eb09"
  ]

Do these pieces of hardware have the information I need to fix the data?

  def my_valid_cpu_data?(hardware)
    required_keys = ["cores", "count"]
    return false unless hardware.plan_version.present? && hardware.plan_version.specs["cpus"].present?
    cpu_data = hardware.plan_version.specs["cpus"][0]
    required_keys.map do |k|
	cpu_data.keys.include? k
      end.all?
  end

  can_be_fixed = []

  broken_hardware.each do |h|
    next unless h.plan_version.present?
    can_be_fixed << h if my_valid_cpu_data?(h)
  end; nil

Actually fix the components

  def create_processor_component(h_id, cpu_data, index)
    cpu = ProcessorComponent.new
    cpu.name = cpu_data["name"]
    cpu.type = ProcessorComponent.to_s
    cpu.vendor = cpu_data["manufacturer"]
    cpu.model = cpu_data["model"]
    cpu.serial = "CPU#{index}"
    cpu.firmware_version = "N/A"
    cpu.data = {
      "clock" => cpu_data["speed"],
      "cores" => cpu_data["cores"],
    }
    cpu.hardware_id = h_id
    cpu
  end
  cant_fix = []
  finished = []
  broken_hardware.each_with_index do |h, i|
    unless h.plan_version.present? && h.plan_version["cpus"].present?
      cant_fix << h
      next
    end

    cpu_data = h.plan_version["cpus"][0]
    core_count = h
    c = create_processor_component(h.id, ,
  "04af7a5f-6330-4095-b525-ea8a596db035"
  "111fc3d1-7002-4c22-9d29-e2539c610bb1"
  "15a4071c-ddd9-4fc5-b9b9-35d5831a9de3"
  "19798268-39ca-454e-a7de-cab1a9cae4a5"
  "1df18ad3-3189-4b87-9654-7d9b062d553d"
  "20388df4-c645-445c-8563-114213c85604"
  "2cafd1cc-a6ba-4caf-849d-969ac22eddca"
  "2cc2596e-8045-49ea-8274-5b84e27a643c"
  "2d4941a3-f0ce-454c-b9dc-6f5bf3381519"
  "2e13125c-9794-4392-ab7c-0dbb10b3b4f7"
  "2e24c7dc-a219-45c2-ae79-1aa0eb367d56"
  "2ffa9123-6466-49a3-ac81-84a7e0dcb437"
  "35d423fa-e119-4c9b-8eed-9193a4037b18"
  "39888ace-88cb-49f8-8eef-f1ec14c36d2c"
  "4470e1bc-0c1e-47ac-99e0-8f23cc075228"
  "56c91002-4e8e-4ab8-b653-d8fb459ad186"
  "59daefde-f2c2-42c2-8bc9-90d5a00e98e9"
  "5bf121bf-1b11-429b-9f73-11206e9f438c"
  "5f81d1f6-9c7d-41b0-bb02-a4cb5b31b1ab"
  "613d4464-8c0b-44a8-8bcc-9ece50b17ce5"
  "62e344ed-2fe1-4778-92e8-0dd386cf0590"
  "630cf74d-d689-496c-b29f-5f094c4455d5"
  "649fa2b1-675c-4433-9256-e7632092ab8a"
  "66f1ef27-3310-40c3-8d06-6c889ddc1e15"
  "6ac54a10-c47d-446d-8ef5-d4131bdc746c"
  "6c7e5828-68fe-4114-a8e8-1e3ce9747de0"
  "773240e6-7f9b-472f-847e-0a9f914e4493"
  "77f9ba1e-bcd4-46c9-963a-b861fb573ab2"
  "7fe941fd-4533-411e-93ad-832632910cf2"
  "858a0e53-56ec-4b77-b852-8371f3ead1bd"
  "85b1ab1b-664b-4d0f-855e-30ccf7f16f50"
  "921d04e8-b7b8-4e13-a9f3-f55302d970c1"
  "9430eb5e-fbe2-48b0-b180-d94347a5f296"
  "a172606f-4d90-41b8-a1f1-0cd1b20aaa7f"
  "a8f5d150-0f5b-4a92-9583-0e70473a9b8b"
  "a96d685d-b16f-4852-84dd-dd3304b37471"
  "aa1f836f-5808-452a-a5bc-884acd3bcd90"
  "abc678fd-d92b-4fc9-ad46-bc6316c170c6"
  "afce0857-1016-4638-91bf-f67ee9ade423"
  "b37bae11-645f-45cf-b55e-20604b5f3030"
  "d263768d-c460-4bdd-81fa-c04fe80122cc"
  "d4849d7e-8b68-4f14-97f6-0682c20d4706"
  "d634a3b5-98ef-4eca-8fe3-3bc4903170c9"
  "dff6b6b3-d46e-47c8-8c85-e85f2566893b"
  "e248f2f0-b1dc-4e6e-b025-687ea375fe2d"
  "e9e17d57-f8dd-4f8a-b31b-6e33c8e25078"
  "fa97834e-d71e-4d8f-8fc0-2e8988a05a28"
  "fb6c21a5-8640-4e1e-af18-2790f3a79873"
  1. LicenseActivationID
  2. Licensable (an Instance Model)
  3. PlanVersion
  4. CPU count and CPU cores
  5. Update License.data["cores"] = cpu_count * cpu_cores
  # this doesn't save the things
  def fix_core_count_prime(license_activation)
    instance = license_activation.licensable
    return "missing instance" unless instance.present?
    plan_version = instance.plan_version
    return "missing plan_version" unless plan_version.present?

    cpu_data = plan_version.specs["cpus"][0]
    return "missing cpu_data" unless cpu_data.present? && cpu_data["cores"] && cpu_data["count"]
    cpu_count = plan_version.specs["cpus"][0]["count"].to_i
    cpu_cores = plan_version.specs["cpus"][0]["cores"].to_i

    license = license_activation.license
    license.data["cores"] = cpu_count * cpu_cores

    license
  end


  res = broken_license_activations[2..].map do |la_id|
    la = LicenseActivation.with_deleted.find(la_id)
    return "couldn't find la #{la_id}" unless la.present?
    fix_core_count_prime(la)
  end

  # when I wanted to get the successful ones
  res.filter { |item| item.is_a? License }

  # when I wanted to see what broke
  res.filter { |item| !item.is_a? License }

Are there any windows licenses remaining with 0 cores, that aren't erroring yet?

  activations = LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").all

  missing_cores = activations.map do |la|
    if la.license.data["cores"] == 0
      la
    else
      nil
    end
  end.compact


  fixed_licenses = missing_cores.map do |la|
    fix_core_count_prime(la)
  end
  irb(main):066:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer = 0").count
  => 0
  irb(main):067:0> LicenseActivation.eager_load(:license).eager_load(:licensee_product).where("licensee_products.slug LIKE '%windows%'").where("(licenses.data->>'cores')::integer > 0").count
  => 538