20130110

Fixing Balance-ALB (Mode 6) Bonding for KVM

I ended up contacting the netdev list, looking to see if the problems I was experiencing with Balance-ALB were fixable and if a fix would be accepted.

Good news!  It was already fixed!

Bad news...it's only fixed in the 3.8 release candidate right now.

The responder pointed me to the patch submission that fixed the issue at hand: balance-ALB would no longer stomp MACs that did not originate from the host itself.  Simple enough to apply to the 3.0 kernel, but there had been some other changes that caused both a hunk to fail and the build to fail.  I had to pull in a function from upstream and backport it into one of the headers.  The next challenge was getting the .deb packages built...I made the mistake of doing this on a ramdrive, not realizing it would compile everything three times and generate three images.  24G of ramdrive later, it was done.

The installation, at least, was easy enough...thanks to the .debs.  After rebooting, the bond worked correctly, and the MACs for all my virtuals are now visible and correct!

For posterity, this is the link I was given for the original patch:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=patch;h=567b871e503316b0927e54a3d7c86d50b722d955

Below is the patch for the 3.0 kernel.  The patch appears to build for kernels up to (but not including) the 3.7 series.  3.7 should work if you omit the etherdevice.h portion of the patch.

diff -uNr linux-3.0.0-a/drivers/net/bonding/bond_alb.c linux-3.0.0-b/drivers/net/bonding/bond_alb.c
--- linux-3.0.0-a/drivers/net/bonding/bond_alb.c        2013-01-10 12:47:53.000000000 -0500
+++ linux-3.0.0-b/drivers/net/bonding/bond_alb.c        2013-01-10 12:50:58.000000000 -0500
@@ -666,6 +666,12 @@
        struct arp_pkt *arp = arp_pkt(skb);
        struct slave *tx_slave = NULL;

+       /* Don't modify or load balance ARPs that do not originate locally
+        * (e.g.,arrive via a bridge).
+        */
+       if (!bond_slave_has_mac(bond, arp->mac_src))
+               return NULL;
+
        if (arp->op_code == htons(ARPOP_REPLY)) {
                /* the arp must be sent on the selected
                * rx channel
diff -uNr linux-3.0.0-a/drivers/net/bonding/bonding.h linux-3.0.0-b/drivers/net/bonding/bonding.h
--- linux-3.0.0-a/drivers/net/bonding/bonding.h 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0-b/drivers/net/bonding/bonding.h 2013-01-10 12:51:05.000000000 -0500
@@ -18,6 +18,7 @@
 #include
 #include
 #include
+#include
 #include
 #include
 #include
@@ -431,6 +432,18 @@
 }
 #endif

+static inline struct slave *bond_slave_has_mac(struct bonding *bond,
+                                              const u8 *mac)
+{
+       int i = 0;
+       struct slave *tmp;
+
+       bond_for_each_slave(bond, tmp, i)
+               if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+                       return tmp;
+
+       return NULL;
+}

 /* exported from bond_main.c */
 extern int bond_net_id;
diff -uNr linux-3.0.0-a/include/linux/etherdevice.h linux-3.0.0-b/include/linux/etherdevice.h
--- linux-3.0.0-a/include/linux/etherdevice.h   2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0-b/include/linux/etherdevice.h   2013-01-10 12:51:16.000000000 -0500
@@ -275,4 +275,37 @@
 #endif
 }

+/**
+ * ether_addr_equal_64bits - Compare two Ethernet addresses
+ * @addr1: Pointer to an array of 8 bytes
+ * @addr2: Pointer to an other array of 8 bytes
+ *
+ * Compare two Ethernet addresses, returns true if equal, false otherwise.
+ *
+ * The function doesn't need any conditional branches and possibly uses
+ * word memory accesses on CPU allowing cheap unaligned memory reads.
+ * arrays = { byte1, byte2, byte3, byte4, byte5, byte6, pad1, pad2 }
+ *
+ * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits.
+ */
+
+static inline bool ether_addr_equal_64bits(const u8 addr1[6+2],
+                                           const u8 addr2[6+2])
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+        unsigned long fold = ((*(unsigned long *)addr1) ^
+                              (*(unsigned long *)addr2));
+
+        if (sizeof(fold) == 8)
+                return zap_last_2bytes(fold) == 0;
+
+        fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^
+                                (*(unsigned long *)(addr2 + 4)));
+        return fold == 0;
+#else
+        return ether_addr_equal(addr1, addr2);
+#endif
+}
+
+
 #endif /* _LINUX_ETHERDEVICE_H */


No comments:

Post a Comment